1 /* ###################### CMSIS Support for Cortex-M4/7/33/35P/55 SIMD Instructions ####################### */
4 \defgroup intrinsic_SIMD_gr Intrinsic Functions for SIMD Instructions
5 \brief Access to dedicated SIMD instructions available on Armv7E-M (Cortex-M4/M7), Armv8-M Mainline
6 (Cortex-M33/M35P), and Armv8.1-M (Cortex-M55).
10 <b>Single Instruction Multiple Data (SIMD)</b> extensions are provided <b>only for Cortex-M4 and Cortex-M7 cores</b>
11 to simplify development of application software. SIMD extensions increase the processing capability
12 without materially increasing the power consumption. The SIMD extensions are completely transparent
13 to the operating system (OS), allowing existing OS ports to be used.
17 - Simultaneous computation of 2x16-bit or 4x8-bit operands
18 - Fractional arithmetic
19 - User definable saturation modes (arbitrary word-width)
20 - Dual 16x16 multiply-add/subtract 32x32 fractional MAC
21 - Simultaneous 8/16-bit select operations
22 - Performance up to 3.2 GOPS at 800MHz
23 - Performance is achieved with a "near zero" increase in power consumption on a typical implementation
27 \b Addition: Add two values using SIMD function
30 uint32_t add_halfwords(uint32_t val1, uint32_t val2)
32 return __SADD16(val1, val2);
38 \b Subtraction: Subtract two values using SIMD function
41 uint32_t sub_halfwords(uint32_t val1, uint32_t val2)
43 return __SSUB16(val1, val2);
48 \b Multiplication: Performing a multiplication using SIMD function
51 uint32_t dual_mul_add_products(uint32_t val1, uint32_t val2)
53 return __SMUAD(val1, val2);
61 /**************************************************************************************************/
63 \brief GE setting quad 8-bit signed addition
64 \details This function performs four 8-bit signed integer additions.
65 The GE bits of the APSR are set according to the results of the additions.
66 \param val1 first four 8-bit summands.
67 \param val2 second four 8-bit summands.
70 \li the addition of the first bytes from each operand, in the first byte of the return value.
71 \li the addition of the second bytes of each operand, in the second byte of the return value.
72 \li the addition of the third bytes of each operand, in the third byte of the return value.
73 \li the addition of the fourth bytes of each operand, in the fourth byte of the return value.
75 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
76 the results of the operation.
78 If \em res is the return value, then:
79 \li if res[7:0] \>= 0 then APSR.GE[0] = 1 else 0
80 \li if res[15:8] \>= 0 then APSR.GE[1] = 1 else 0
81 \li if res[23:16] \>= 0 then APSR.GE[2] = 1 else 0
82 \li if res[31:24] \>= 0 then APSR.GE[3] = 1 else 0
86 res[7:0] = val1[7:0] + val2[7:0]
87 res[15:8] = val1[15:8] + val2[15:8]
88 res[23:16] = val1[23:16] + val2[23:16]
89 res[31:24] = val1[31:24] + val2[31:24]
92 uint32_t __SADD8(uint32_t val1, uint32_t val2);
95 /**************************************************************************************************/
96 /** \ingroup Intrinsic_SIMD_gr
97 \brief Q setting quad 8-bit saturating addition
98 \details This function enables you to perform four 8-bit integer additions, saturating the results to
99 the 8-bit signed integer range -2<sup>7</sup> \<= x \<= 2<sup>7</sup> - 1.
100 \param val1 first four 8-bit summands.
101 \param val2 second four 8-bit summands.
104 \li the saturated addition of the first byte of each operand in the first byte of the return value.
105 \li the saturated addition of the second byte of each operand in the second byte of the return value.
106 \li the saturated addition of the third byte of each operand in the third byte of the return value.
107 \li the saturated addition of the fourth byte of each operand in the fourth byte of the return value.
109 The returned results are saturated to the 16-bit signed integer range -2<sup>7</sup> \<= x \<= 2<sup>7</sup> - 1.
113 res[7:0] = val1[7:0] + val2[7:0]
114 res[15:8] = val1[15:8] + val2[15:8]
115 res[23:16] = val1[23:16] + val2[23:16]
116 res[31:24] = val1[31:24] + val2[31:24]
119 uint32_t __QADD8(uint32_t val1, uint32_t val2);
122 /**************************************************************************************************/
124 \brief Quad 8-bit signed addition with halved results
125 \details This function enables you to perform four signed 8-bit integer additions, halving the results.
126 \param val1 first four 8-bit summands.
127 \param val2 second four 8-bit summands.
130 \li the halved addition of the first bytes from each operand, in the first byte of the return value.
131 \li the halved addition of the second bytes from each operand, in the second byte of the return value.
132 \li the halved addition of the third bytes from each operand, in the third byte of the return value.
133 \li the halved addition of the fourth bytes from each operand, in the fourth byte of the return value.
137 res[7:0] = val1[7:0] + val2[7:0] >> 1
138 res[15:8] = val1[15:8] + val2[15:8] >> 1
139 res[23:16] = val1[23:16] + val2[23:16] >> 1
140 res[31:24] = val1[31:24] + val2[31:24] >> 1
143 uint32_t __SHADD8(uint32_t val1, uint32_t val2);
146 /**************************************************************************************************/
147 /** \ingroup Intrinsic_SIMD_gr
148 \brief GE setting quad 8-bit unsigned addition
150 \details This function enables you to perform four unsigned 8-bit integer additions.
151 The GE bits of the APSR are set according to the results.
153 \param val1 first four 8-bit summands for each addition.
154 \param val2 second four 8-bit summands for each addition.
157 \li the halved addition of the first bytes from each operand, in the first byte of the return value.
158 \li the halved addition of the second bytes from each operand, in the second byte of the return value.
159 \li the halved addition of the third bytes from each operand, in the third byte of the return value.
160 \li the halved addition of the fourth bytes from each operand, in the fourth byte of the return value.
163 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on the results of the operation.
166 If \em res is the return value, then:
167 \li if res[7:0] \>= 0x100 then APSR.GE[0] = 1 else 0
168 \li if res[15:8] \>= 0x100 then APSR.GE[1] = 1 else 0
169 \li if res[23:16] \>= 0x100 then APSR.GE[2] = 1 else 0
170 \li if res[31:24] \>= 0x100 then APSR.GE[3] = 1 else 0
174 res[7:0] = val1[7:0] + val2[7:0]
175 res[15:8] = val1[15:8] + val2[15:8]
176 res[23:16] = val1[23:16] + val2[23:16]
177 res[31:24] = val1[31:24] + val2[31:24]
180 uint32_t __UADD8(uint32_t val1, uint32_t val2);
183 /**************************************************************************************************/
185 \brief Quad 8-bit unsigned saturating addition
187 \details This function enables you to perform four unsigned 8-bit integer additions, saturating the
188 results to the 8-bit unsigned integer range 0 \< x \< 2<sup>8</sup> - 1.
190 \param val1 first four 8-bit summands.
191 \param val2 second four 8-bit summands.
194 \li the halved addition of the first bytes in each operand, in the first byte of the return value.
195 \li the halved addition of the second bytes in each operand, in the second byte of the return value.
196 \li the halved addition of the third bytes in each operand, in the third byte of the return value.
197 \li the halved addition of the fourth bytes in each operand, in the fourth byte of the return value.
200 The results are saturated to the 8-bit unsigned integer range 0 \< x \< 2<sup>8</sup> - 1.
204 res[7:0] = val1[7:0] + val2[7:0]
205 res[15:8] = val1[15:8] + val2[15:8]
206 res[23:16] = val1[23:16] + val2[23:16]
207 res[31:24] = val1[31:24] + val2[31:24]
210 uint32_t __UQADD8(uint32_t val1, uint32_t val2);
213 /**************************************************************************************************/
215 \brief Quad 8-bit unsigned addition with halved results
217 \details This function enables you to perform four unsigned 8-bit integer additions, halving the results.
219 \param val1 first four 8-bit summands.
220 \param val2 second four 8-bit summands.
223 \li the halved addition of the first bytes in each operand, in the first byte of the return value.
224 \li the halved addition of the second bytes in each operand, in the second byte of the return value.
225 \li the halved addition of the third bytes in each operand, in the third byte of the return value.
226 \li the halved addition of the fourth bytes in each operand, in the fourth byte of the return value.
230 res[7:0] = val1[7:0] + val2[7:0] >> 1
231 res[15:8] = val1[15:8] + val2[15:8] >> 1
232 res[23:16] = val1[23:16] + val2[23:16] >> 1
233 res[31:24] = val1[31:24] + val2[31:24] >> 1
236 uint32_t __UHADD8(uint32_t val1, uint32_t val2);
239 /**************************************************************************************************/
241 \brief GE setting quad 8-bit signed subtraction
243 \details This function enables you to perform four 8-bit signed integer subtractions.<br>
244 The GE bits in the APSR are set according to the results.
246 \param val1 first four 8-bit operands of each subtraction.
247 \param val2 second four 8-bit operands of each subtraction.
250 \li the subtraction of the first byte in the second operand from the first byte in the
251 first operand, in the first bytes of the return value.
252 \li the subtraction of the second byte in the second operand from the second byte in
253 the first operand, in the second byte of the return value.
254 \li the subtraction of the third byte in the second operand from the third byte in the
255 first operand, in the third byte of the return value.
256 \li the subtraction of the fourth byte in the second operand from the fourth byte in
257 the first operand, in the fourth byte of the return value.
259 \par Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
260 the results of the operation.
263 If \em res is the return value, then:
264 \li if res[8:0] \>= 0 then APSR.GE[0] = 1 else 0
265 \li if res[15:8] \>= 0 then APSR.GE[1] = 1 else 0
266 \li if res[23:16] \>= 0 then APSR.GE[2] = 1 else 0
267 \li if res[31:24] \>= 0 then APSR.GE[3] = 1 else 0
272 res[7:0] = val1[7:0] - val2[7:0]
273 res[15:8] = val1[15:8] - val2[15:8]
274 res[23:16] = val1[23:16] - val2[23:16]
275 res[31:24] = val1[31:24] - val2[31:24]
278 uint32_t __SSUB8(uint32_t val1, uint32_t val2);
281 /**************************************************************************************************/
283 \brief Q setting quad 8-bit saturating subtract
285 \details This function enables you to perform four 8-bit integer subtractions, saturating the results
286 to the 8-bit signed integer range -2<sup>7</sup> \<= x \<= 2<sup>7</sup> - 1.
288 \param val1 first four 8-bit operands.
289 \param val2 second four 8-bit operands.
292 \li the subtraction of the first byte in the second operand from the first byte in the
293 first operand, in the first bytes of the return value.
294 \li the subtraction of the second byte in the second operand from the second byte in
295 the first operand, in the second byte of the return value.
296 \li the subtraction of the third byte in the second operand from the third byte in the
297 first operand, in the third byte of the return value.
298 \li the subtraction of the fourth byte in the second operand from the fourth byte in
299 the first operand, in the fourth byte of the return value.
302 The returned results are saturated to the 8-bit signed integer range -2<sup>7</sup> \<= x \<= 2<sup>7</sup> - 1.
307 res[7:0] = val1[7:0] - val2[7:0]
308 res[15:8] = val1[15:8] - val2[15:8]
309 res[23:16] = val1[23:16] - val2[23:16]
310 res[31:24] = val1[31:24] - val2[31:24]
313 uint32_t __QSUB8(uint32_t val1, uint32_t val2);
316 /**************************************************************************************************/
318 \brief Quad 8-bit signed subtraction with halved results
320 \details This function enables you to perform four signed 8-bit integer subtractions, halving the
323 \param val1 first four 8-bit operands.
324 \param val2 second four 8-bit operands.
327 \li the halved subtraction of the first byte in the second operand from the first byte in the
328 first operand, in the first bytes of the return value.
329 \li the halved subtraction of the second byte in the second operand from the second byte in
330 the first operand, in the second byte of the return value.
331 \li the halved subtraction of the third byte in the second operand from the third byte in the
332 first operand, in the third byte of the return value.
333 \li the halved subtraction of the fourth byte in the second operand from the fourth byte in
334 the first operand, in the fourth byte of the return value.
338 res[7:0] = val1[7:0] - val2[7:0] >> 1
339 res[15:8] = val1[15:8] - val2[15:8] >> 1
340 res[23:16] = val1[23:16] - val2[23:16] >> 1
341 res[31:24] = val1[31:24] - val2[31:24] >> 1
344 uint32_t __SHSUB8(uint32_t val1, uint32_t val2);
347 /**************************************************************************************************/
349 \brief GE setting quad 8-bit unsigned subtract
351 \details This function enables you to perform four 8-bit unsigned integer subtractions.
352 The GE bits in the APSR are set according to the results.
354 \param val1 first four 8-bit operands.
355 \param val2 second four 8-bit operands.
358 \li the subtraction of the first byte in the second operand from the first byte in the
359 first operand, in the first bytes of the return value.
360 \li the subtraction of the second byte in the second operand from the second byte in
361 the first operand, in the second byte of the return value.
362 \li the subtraction of the third byte in the second operand from the third byte in the
363 first operand, in the third byte of the return value.
364 \li the subtraction of the fourth byte in the second operand from the fourth byte in
365 the first operand, in the fourth byte of the return value.
368 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
369 the results of the operation.
372 If \em res is the return value, then:
373 \li if res[8:0] \>= 0 then APSR.GE[0] = 1 else 0
374 \li if res[15:8] \>= 0 then APSR.GE[1] = 1 else 0
375 \li if res[23:16] \>= 0 then APSR.GE[2] = 1 else 0
376 \li if res[31:24] \>= 0 then APSR.GE[3] = 1 else 0
381 res[7:0] = val1[7:0] - val2[7:0]
382 res[15:8] = val1[15:8] - val2[15:8]
383 res[23:16] = val1[23:16] - val2[23:16]
384 res[31:24] = val1[31:24] - val2[31:24]
387 uint32_t __USUB8(uint32_t val1, uint32_t val2);
390 /**************************************************************************************************/
392 \brief Quad 8-bit unsigned saturating subtraction
394 \details This function enables you to perform four unsigned 8-bit integer subtractions, saturating
395 the results to the 8-bit unsigned integer range 0 \< x \< 2<sup>8</sup> - 1.
397 \param val1 first four 8-bit operands.
398 \param val2 second four 8-bit operands.
401 \li the subtraction of the first byte in the second operand from the first byte in the
402 first operand, in the first bytes of the return value.
403 \li the subtraction of the second byte in the second operand from the second byte in
404 the first operand, in the second byte of the return value.
405 \li the subtraction of the third byte in the second operand from the third byte in the
406 first operand, in the third byte of the return value.
407 \li the subtraction of the fourth byte in the second operand from the fourth byte in
408 the first operand, in the fourth byte of the return value.
411 The results are saturated to the 8-bit unsigned integer range 0 \< x \< 2<sup>8</sup> - 1.
416 res[7:0] = val1[7:0] - val2[7:0]
417 res[15:8] = val1[15:8] - val2[15:8]
418 res[23:16] = val1[23:16] - val2[23:16]
419 res[31:24] = val1[31:24] - val2[31:24]
422 uint32_t __UQSUB8(uint32_t val1, uint32_t val2);
425 /**************************************************************************************************/
427 \brief Quad 8-bit unsigned subtraction with halved results
429 \details This function enables you to perform four unsigned 8-bit integer subtractions, halving the
432 \param val1 first four 8-bit operands.
433 \param val2 second four 8-bit operands.
436 \li the halved subtraction of the first byte in the second operand from the first byte in the
437 first operand, in the first bytes of the return value.
438 \li the halved subtraction of the second byte in the second operand from the second byte in
439 the first operand, in the second byte of the return value.
440 \li the halved subtraction of the third byte in the second operand from the third byte in the
441 first operand, in the third byte of the return value.
442 \li the halved subtraction of the fourth byte in the second operand from the fourth byte in
443 the first operand, in the fourth byte of the return value.
447 res[7:0] = val1[7:0] - val2[7:0] >> 1
448 res[15:8] = val1[15:8] - val2[15:8] >> 1
449 res[23:16] = val1[23:16] - val2[23:16] >> 1
450 res[31:24] = val1[31:24] - val2[31:24] >> 1
453 uint32_t __UHSUB8(uint32_t val1, uint32_t val2);
456 /**************************************************************************************************/
458 \brief GE setting dual 16-bit signed addition
460 \details This function enables you to perform two 16-bit signed integer additions.<br>
461 The GE bits in the APSR are set according to the results of the additions.
463 \param val1 first two 16-bit summands.
464 \param val2 second two 16-bit summands.
467 \li the addition of the low halfwords in the low halfword of the return value.
468 \li the addition of the high halfwords in the high halfword of the return value.
471 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
472 the results of the operation.
474 If \em res is the return value, then:
475 \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00
476 \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00
481 res[15:0] = val1[15:0] + val2[15:0]
482 res[31:16] = val1[31:16] + val2[31:16]
485 uint32_t __SADD16(uint32_t val1, uint32_t val2);
488 /**************************************************************************************************/
490 \brief Q setting dual 16-bit saturating addition
492 \details This function enables you to perform two 16-bit integer arithmetic additions in parallel,
493 saturating the results to the 16-bit signed integer range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1.
495 \param val1 first two 16-bit summands.
496 \param val2 second two 16-bit summands.
499 \li the saturated addition of the low halfwords, in the low halfword of the return value.
500 \li the saturated addition of the high halfwords, in the high halfword of the return value.
503 The returned results are saturated to the 16-bit signed integer
504 range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1
508 res[15:0] = val1[15:0] + val2[15:0]
509 res[31:16] = val1[31:16] + val2[31:16]
512 uint32_t __QADD16(uint32_t val1, uint32_t val2);
515 /**************************************************************************************************/
517 \brief Dual 16-bit signed addition with halved results
519 \details This function enables you to perform two signed 16-bit integer additions, halving the
522 \param val1 first two 16-bit summands.
523 \param val2 second two 16-bit summands.
526 \li the halved addition of the low halfwords, in the low halfword of the return value.
527 \li the halved addition of the high halfwords, in the high halfword of the return value.
531 res[15:0] = val1[15:0] + val2[15:0] >> 1
532 res[31:16] = val1[31:16] + val2[31:16] >> 1
535 uint32_t __SHADD16(uint32_t val1, uint32_t val2);
538 /**************************************************************************************************/
540 \brief GE setting dual 16-bit unsigned addition
542 \details This function enables you to perform two 16-bit unsigned integer additions.<br>
543 The GE bits in the APSR are set according to the results.
545 \param val1 first two 16-bit summands for each addition.
546 \param val2 second two 16-bit summands for each addition.
549 \li the addition of the low halfwords in each operand, in the low halfword of the
551 \li the addition of the high halfwords in each operand, in the high halfword of the
555 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
556 the results of the operation.
558 If \em res is the return value, then:
559 \li if res[15:0] \>= 0x10000 then APSR.GE[0] = 11 else 00
560 \li if res[31:16] \>= 0x10000 then APSR.GE[1] = 11 else 00
564 res[15:0] = val1[15:0] + val2[15:0]
565 res[31:16] = val1[31:16] + val2[31:16]
568 uint32_t __UADD16(uint32_t val1, uint32_t val2);
571 /**************************************************************************************************/
573 \brief Dual 16-bit unsigned saturating addition
575 \details This function enables you to perform two unsigned 16-bit integer additions, saturating the
576 results to the 16-bit unsigned integer range 0 \< x \< 2<sup>16</sup> - 1.
578 \param val1 first two 16-bit summands.
579 \param val2 second two 16-bit summands.
582 \li the addition of the low halfword in the first operand and the low halfword in the
583 second operand, in the low halfword of the return value.
584 \li the addition of the high halfword in the first operand and the high halfword in the
585 second operand, in the high halfword of the return value.
588 The results are saturated to the 16-bit unsigned integer
589 range 0 \< x \< 2<sup>16</sup> - 1.
593 res[15:0] = val1[15:0] + val2[15:0]
594 res[31:16] = val1[31:16] + val2[31:16]
597 uint32_t __UQADD16(uint32_t val1, uint32_t val2);
600 /**************************************************************************************************/
602 \brief Dual 16-bit unsigned addition with halved results
604 \details This function enables you to perform two unsigned 16-bit integer additions, halving the
607 \param val1 first two 16-bit summands.
608 \param val2 second two 16-bit summands.
611 \li the halved addition of the low halfwords in each operand, in the low halfword of
613 \li the halved addition of the high halfwords in each operand, in the high halfword
618 res[15:0] = val1[15:0] + val2[15:0] >> 1
619 res[31:16] = val1[31:16] + val2[31:16] >> 1
622 uint32_t __UHADD16(uint32_t val1, uint32_t val2);
625 /**************************************************************************************************/
627 \brief GE setting dual 16-bit signed subtraction
629 \details This function enables you to perform two 16-bit signed integer subtractions.<br>
630 The GE bits in the APSR are set according to the results.
632 \param val1 first two 16-bit operands of each subtraction.
633 \param val2 second two 16-bit operands of each subtraction.
636 \li the subtraction of the low halfword in the second operand from the low halfword
637 in the first operand, in the low halfword of the return value.
638 \li the subtraction of the high halfword in the second operand from the high halfword
639 in the first operand, in the high halfword of the return value.
642 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
643 the results of the operation.
645 If \li res is the return value, then:
646 \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00
647 \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00
652 res[15:0] = val1[15:0] - val2[15:0]
653 res[31:16] = val1[31:16] - val2[31:16]
656 uint32_t __SSUB16(uint32_t val1, uint32_t val2);
659 /**************************************************************************************************/
661 \brief Q setting dual 16-bit saturating subtract
663 \details This function enables you to perform two 16-bit integer subtractions, saturating the
664 results to the 16-bit signed integer range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1.
666 \param val1 first two 16-bit operands.
667 \param val2 second two 16-bit operands.
670 \li the saturated subtraction of the low halfword in the second operand from the low
671 halfword in the first operand, in the low halfword of the returned result.
672 \li the saturated subtraction of the high halfword in the second operand from the high
673 halfword in the first operand, in the high halfword of the returned result.
676 The returned results are saturated to the 16-bit signed integer
677 range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1.
681 res[15:0] = val1[15:0] - val2[15:0]
682 res[31:16] = val1[31:16] - val2[31:16]
685 uint32_t __QSUB16(uint32_t val1, uint32_t val2);
688 /**************************************************************************************************/
690 \brief Dual 16-bit signed subtraction with halved results
692 \details This function enables you to perform two signed 16-bit integer subtractions, halving the
695 \param val1 first two 16-bit operands.
696 \param val2 second two 16-bit operands.
699 \li the halved subtraction of the low halfword in the second operand from the low
700 halfword in the first operand, in the low halfword of the returned result.
701 \li the halved subtraction of the high halfword in the second operand from the high
702 halfword in the first operand, in the high halfword of the returned result.
707 res[15:0] = val1[15:0] - val2[15:0] >> 1
708 res[31:16] = val1[31:16] - val2[31:16] >> 1
711 uint32_t __SHSUB16(uint32_t val1, uint32_t val2);
714 /**************************************************************************************************/
716 \brief GE setting dual 16-bit unsigned subtract
718 \details This function enables you to perform two 16-bit unsigned integer subtractions.<br>
719 The GE bits in the APSR are set according to the results.
721 \param val1 first two 16-bit operands.
722 \param val2 second two 16-bit operands.
725 \li the subtraction of the low halfword in the second operand from the low halfword
726 in the first operand, in the low halfword of the return value.
727 \li the subtraction of the high halfword in the second operand from the high halfword
728 in the first operand, in the high halfword of the return value.
731 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
732 the results of the operation.
735 If \em res is the return value, then:
736 \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00
737 \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00
741 res[15:0] = val1[15:0] - val2[15:0]
742 res[31:16] = val1[31:16] - val2[31:16]
745 uint32_t __USUB16(uint32_t val1, uint32_t val2);
748 /**************************************************************************************************/
750 \brief Dual 16-bit unsigned saturating subtraction
752 \details This function enables you to perform two unsigned 16-bit integer subtractions, saturating
753 the results to the 16-bit unsigned integer range 0 \< x \< 2<sup>16</sup> - 1.
755 \param val1 first two 16-bit operands for each subtraction.
756 \param val2 second two 16-bit operands for each subtraction.
759 \li the subtraction of the low halfword in the second operand from the low halfword
760 in the first operand, in the low halfword of the return value.
761 \li the subtraction of the high halfword in the second operand from the high halfword
762 in the first operand, in the high halfword of the return value.
765 The results are saturated to the 16-bit unsigned integer range 0 \< x \< 2<sup>16</sup> - 1.
770 res[15:0] = val1[15:0] - val2[15:0]
771 res[31:16] = val1[31:16] - val2[31:16]
774 uint32_t __UQSUB16(uint32_t val1, uint32_t val2);
777 /**************************************************************************************************/
779 \brief Dual 16-bit unsigned subtraction with halved results
781 \details This function enables you to perform two unsigned 16-bit integer subtractions, halving
784 \param val1 first two 16-bit operands.
785 \param val2 second two 16-bit operands.
788 \li the halved subtraction of the low halfword in the second operand from the low halfword
789 in the first operand, in the low halfword of the return value.
790 \li the halved subtraction of the high halfword in the second operand from the high halfword
791 in the first operand, in the high halfword of the return value.
796 res[15:0] = val1[15:0] - val2[15:0] >> 1
797 res[31:16] = val1[31:16] - val2[31:16] >> 1
800 uint32_t __UHSUB16(uint32_t val1, uint32_t val2);
803 /**************************************************************************************************/
805 \brief GE setting dual 16-bit addition and subtraction with exchange
807 \details This function inserts an SASX instruction into the instruction stream generated by the
808 compiler. It enables you to exchange the halfwords of the second operand, add the high
809 halfwords and subtract the low halfwords.<br>
810 The GE bits in the APRS are set according to the results.
812 \param val1 first operand for the subtraction in the low halfword, and the
813 first operand for the addition in the high halfword.
814 \param val2 second operand for the subtraction in the high halfword, and the
815 second operand for the addition in the low halfword.
818 \li the subtraction of the high halfword in the second operand from the low halfword
819 in the first operand, in the low halfword of the return value.
820 \li the addition of the high halfword in the first operand and the low halfword in the
821 second operand, in the high halfword of the return value.
824 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
825 the results of the operation.
827 If \em res is the return value, then:
828 \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00
829 \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00
833 res[15:0] = val1[15:0] - val2[31:16]
834 res[31:16] = val1[31:16] + val2[15:0]
837 uint32_t __SASX(uint32_t val1, uint32_t val2);
840 /**************************************************************************************************/
842 \brief Q setting dual 16-bit add and subtract with exchange
844 \details This function enables you to exchange the halfwords of the one operand, then add the high
845 halfwords and subtract the low halfwords, saturating the results to the 16-bit signed
846 integer range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1.
848 \param val1 first operand for the subtraction in the low halfword, and the
849 first operand for the addition in the high halfword.
850 \param val2 second operand for the subtraction in the high halfword, and the
851 second operand for the addition in the low halfword.
854 \li the saturated subtraction of the high halfword in the second operand from the low
855 halfword in the first operand, in the low halfword of the return value.
856 \li the saturated addition of the high halfword in the first operand and the low
857 halfword in the second operand, in the high halfword of the return value.
860 The returned results are saturated to the 16-bit signed integer
861 range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1.
865 res[15:0] = val1[15:0] - val2[31:16]
866 res[31:16] = val1[31:16] + val2[15:0]
869 uint32_t __QASX(uint32_t val1, uint32_t val2);
872 /**************************************************************************************************/
874 \brief Dual 16-bit signed addition and subtraction with halved results
876 \details This function enables you to exchange the two halfwords of one operand, perform one
877 signed 16-bit integer addition and one signed 16-bit subtraction, and halve the results.
879 \param val1 first 16-bit operands.
880 \param val2 second 16-bit operands.
883 \li the halved subtraction of the high halfword in the second operand from the low
884 halfword in the first operand, in the low halfword of the return value.
885 \li the halved addition of the low halfword in the second operand and the high
886 halfword in the first operand, in the high halfword of the return value.
890 res[15:0] = (val1[15:0] - val2[31:16]) >> 1
891 res[31:16] = (val1[31:16] + val2[15:0] ) >> 1
894 uint32_t __SHASX(uint32_t val1, uint32_t val2);
897 /**************************************************************************************************/
899 \brief GE setting dual 16-bit unsigned addition and subtraction with exchange
901 \details This function enables you to exchange the two halfwords of the second operand, add the
902 high halfwords and subtract the low halfwords.<br>
903 The GE bits in the APSR are set according to the results.
905 \param val1 first operand for the subtraction in the low halfword, and the
906 first operand for the addition in the high halfword.
907 \param val2 second operand for the subtraction in the high halfword and the
908 second operand for the addition in the low halfword.
911 \li the subtraction of the high halfword in the second operand from the low halfword
912 in the first operand, in the low halfword of the return value.
913 \li the addition of the high halfword in the first operand and the low halfword in the
914 second operand, in the high halfword of the return value.
917 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
918 the results of the operation.
920 \par If \em res is the return value, then:
921 \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00
922 \li if res[31:16] \>= 0x10000 then APSR.GE[3:2] = 11 else 00
926 res[15:0] = val1[15:0] - val2[31:16]
927 res[31:16] = val1[31:16] + val2[15:0]
930 uint32_t __UASX(uint32_t val1, uint32_t val2);
933 /**************************************************************************************************/
935 \brief Dual 16-bit unsigned saturating addition and subtraction with exchange
937 \details This function enables you to exchange the halfwords of the second operand and perform
938 one unsigned 16-bit integer addition and one unsigned 16-bit subtraction, saturating the
939 results to the 16-bit unsigned integer range 0 \<= x \<= 2<sup>16</sup> - 1.
941 \param val1 first two 16-bit operands.
942 \param val2 second two 16-bit operands.
945 \li the subtraction of the high halfword in the second operand from the low halfword
946 in the first operand, in the low halfword of the return value.
947 \li the subtraction of the low halfword in the second operand from the high halfword
948 in the first operand, in the high halfword of the return value.
951 The results are saturated to the 16-bit unsigned integer
952 range 0 \<= x \<= 2<sup>16</sup> - 1.
956 res[15:0] = val1[15:0] - val2[31:16]
957 res[31:16] = val1[31:16] + val2[15:0]
960 uint32_t __UQASX(uint32_t val1, uint32_t val2);
963 /**************************************************************************************************/
965 \brief Dual 16-bit unsigned addition and subtraction with halved results and exchange
967 \details This function enables you to exchange the halfwords of the second operand, add the high
968 halfwords and subtract the low halfwords, halving the results.
970 \param val1 first operand for the subtraction in the low halfword, and the
971 first operand for the addition in the high halfword.
972 \param val2 second operand for the subtraction in the high halfword, and the
973 second operand for the addition in the low halfword.
976 \li the halved subtraction of the high halfword in the second operand from the low
977 halfword in the first operand.
978 \li the halved addition of the high halfword in the first operand and the low halfword
979 in the second operand.
984 res[15:0] = (val1[15:0] - val2[31:16]) >> 1
985 res[31:16] = (val1[31:16] + val2[15:0] ) >> 1
988 uint32_t __UHASX(uint32_t val1, uint32_t val2);
991 /**************************************************************************************************/
993 \brief GE setting dual 16-bit signed subtraction and addition with exchange
995 \details This function enables you to exchange the two halfwords of one operand and perform one
996 16-bit integer subtraction and one 16-bit addition.<br>
997 The GE bits in the APSR are set according to the results.
999 \param val1 first operand for the addition in the low halfword, and the first
1000 operand for the subtraction in the high halfword.
1001 \param val2 second operand for the addition in the high halfword, and the
1002 second operand for the subtraction in the low halfword.
1005 \li the addition of the low halfword in the first operand and the high halfword in the
1006 second operand, in the low halfword of the return value.
1007 \li the subtraction of the low halfword in the second operand from the high halfword
1008 in the first operand, in the high halfword of the return value.
1010 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
1011 the results of the operation.
1013 If \em res is the return value, then:
1014 \li if res[15:0] \>= 0 then APSR.GE[1:0] = 11 else 00
1015 \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00
1019 res[15:0] = val1[15:0] + val2[31:16]
1020 res[31:16] = val1[31:16] - val2[15:0]
1023 uint32_t __SSAX(uint32_t val1, uint32_t val2);
1026 /**************************************************************************************************/
1028 \brief Q setting dual 16-bit subtract and add with exchange
1030 \details This function enables you to exchange the halfwords of one operand, then subtract the
1031 high halfwords and add the low halfwords, saturating the results to the 16-bit signed
1032 integer range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1.
1034 \param val1 first operand for the addition in the low halfword, and the first
1035 operand for the subtraction in the high halfword.
1036 \param val2 second operand for the addition in the high halfword, and the
1037 second operand for the subtraction in the low halfword.
1040 \li the saturated addition of the low halfword of the first operand and the high
1041 halfword of the second operand, in the low halfword of the return value.
1042 \li the saturated subtraction of the low halfword of the second operand from the high
1043 halfword of the first operand, in the high halfword of the return value.
1045 The returned results are saturated to the 16-bit signed integer
1046 range -2<sup>15</sup> \<= x \<= 2<sup>15</sup> - 1.
1050 res[15:0] = val1[15:0] + val2[31:16]
1051 res[31:16] = val1[31:16] - val2[15:0]
1054 uint32_t __QSAX(uint32_t val1, uint32_t val2);
1057 /**************************************************************************************************/
1059 \brief Dual 16-bit signed subtraction and addition with halved results
1061 \details This function enables you to exchange the two halfwords of one operand, perform one
1062 signed 16-bit integer subtraction and one signed 16-bit addition, and halve the results.
1064 \param val1 first 16-bit operands.
1065 \param val2 second 16-bit operands.
1068 \li the halved addition of the low halfword in the first operand and the high halfword
1069 in the second operand, in the low halfword of the return value.
1070 \li the halved subtraction of the low halfword in the second operand from the high
1071 halfword in the first operand, in the high halfword of the return value.
1075 res[15:0] = (val1[15:0] + val2[31:16]) >> 1
1076 res[31:16] = (val1[31:16] - val2[15:0] ) >> 1
1079 uint32_t __SHSAX(uint32_t val1, uint32_t val2);
1082 /**************************************************************************************************/
1084 \brief GE setting dual 16-bit unsigned subtract and add with exchange
1086 \details This function enables you to exchange the halfwords of the second operand, subtract the
1087 high halfwords and add the low halfwords.<br>
1088 The GE bits in the APSR are set according to the results.
1090 \param val1 first operand for the addition in the low halfword, and the first
1091 operand for the subtraction in the high halfword.
1092 \param val2 second operand for the addition in the high halfword, and the
1093 second operand for the subtraction in the low halfword.
1096 \li the addition of the low halfword in the first operand and the high halfword in the
1097 second operand, in the low halfword of the return value.
1098 \li the subtraction of the low halfword in the second operand from the high halfword
1099 in the first operand, in the high halfword of the return value.
1101 Each bit in APSR.GE is set or cleared for each byte in the return value, depending on
1102 the results of the operation.
1104 If \em res is the return value, then:
1105 \li if res[15:0] \>= 0x10000 then APSR.GE[1:0] = 11 else 00
1106 \li if res[31:16] \>= 0 then APSR.GE[3:2] = 11 else 00
1110 res[15:0] = val1[15:0] + val2[31:16]
1111 res[31:16] = val1[31:16] - val2[15:0]
1114 uint32_t __USAX(uint32_t val1, uint32_t val2);
1117 /**************************************************************************************************/
1119 \brief Dual 16-bit unsigned saturating subtraction and addition with exchange
1121 \details This function enables you to exchange the halfwords of the second operand and perform
1122 one unsigned 16-bit integer subtraction and one unsigned 16-bit addition, saturating the
1123 results to the 16-bit unsigned integer range 0 \<= x \<= 2<sup>16</sup> - 1.
1125 \param val1 first 16-bit operand for the addition in the low halfword, and the
1126 first 16-bit operand for the subtraction in the high halfword.
1127 \param val2 second 16-bit halfword for the addition in the high halfword,
1128 and the second 16-bit halfword for the subtraction in the low halfword.
1131 \li the addition of the low halfword in the first operand and the high halfword in the
1132 second operand, in the low halfword of the return value.
1133 \li the subtraction of the low halfword in the second operand from the high halfword
1134 in the first operand, in the high halfword of the return value.
1136 The results are saturated to the 16-bit unsigned integer
1137 range 0 \<= x \<= 2<sup>16</sup> - 1.
1141 res[15:0] = val1[15:0] + val2[31:16]
1142 res[31:16] = val1[31:16] - val2[15:0]
1145 uint32_t __UQSAX(uint32_t val1, uint32_t val2);
1148 /**************************************************************************************************/
1150 \brief Dual 16-bit unsigned subtraction and addition with halved results and exchange
1152 \details This function enables you to exchange the halfwords of the second operand, subtract the
1153 high halfwords and add the low halfwords, halving the results.
1155 \param val1 first operand for the addition in the low halfword, and the first
1156 operand for the subtraction in the high halfword.
1157 \param val2 second operand for the addition in the high halfword, and the
1158 second operand for the subtraction in the low halfword.
1161 \li the halved addition of the high halfword in the second operand and the low
1162 halfword in the first operand, in the low halfword of the return value.
1163 \li the halved subtraction of the low halfword in the second operand from the high
1164 halfword in the first operand, in the high halfword of the return value.
1168 res[15:0] = (val1[15:0] + val2[31:16]) >> 1
1169 res[31:16] = (val1[31:16] - val2[15:0] ) >> 1
1172 uint32_t __UHSAX(uint32_t val1, uint32_t val2);
1175 /**************************************************************************************************/
1177 \brief Unsigned sum of quad 8-bit unsigned absolute difference
1179 \details This function enables you to perform four unsigned 8-bit subtractions, and add the
1180 absolute values of the differences together, returning the result as a single unsigned
1183 \param val1 first four 8-bit operands for the subtractions.
1184 \param val2 second four 8-bit operands for the subtractions.
1187 \li the subtraction of the first byte in the second operand from the first byte in the
1189 \li the subtraction of the second byte in the second operand from the second byte in
1191 \li the subtraction of the third byte in the second operand from the third byte in the
1193 \li the subtraction of the fourth byte in the second operand from the fourth byte in
1196 The sum is returned as a single unsigned integer.
1201 absdiff1 = val1[7:0] - val2[7:0]
1202 absdiff2 = val1[15:8] - val2[15:8]
1203 absdiff3 = val1[23:16] - val2[23:16]
1204 absdiff4 = val1[31:24] - val2[31:24]
1205 res[31:0] = absdiff1 + absdiff2 + absdiff3 + absdiff4
1208 uint32_t __USAD8(uint32_t val1, uint32_t val2);
1211 /**************************************************************************************************/
1213 \brief Unsigned sum of quad 8-bit unsigned absolute difference with 32-bit accumulate
1215 \details This function enables you to perform four unsigned 8-bit subtractions, and add the
1216 absolute values of the differences to a 32-bit accumulate operand.
1218 \param val1 first four 8-bit operands for the subtractions.
1219 \param val2 second four 8-bit operands for the subtractions.
1220 \param val3 accumulation value.
1223 the sum of the absolute differences of the following
1224 bytes, added to the accumulation value:
1225 \li the subtraction of the first byte in the second operand from the first byte in the
1227 \li the subtraction of the second byte in the second operand from the second byte in
1229 \li the subtraction of the third byte in the second operand from the third byte in the
1231 \li the subtraction of the fourth byte in the second operand from the fourth byte in
1237 absdiff1 = val1[7:0] - val2[7:0]
1238 absdiff2 = val1[15:8] - val2[15:8]
1239 absdiff3 = val1[23:16] - val2[23:16]
1240 absdiff4 = val1[31:24] - val2[31:24]
1241 sum = absdiff1 + absdiff2 + absdiff3 + absdiff4
1242 res[31:0] = sum[31:0] + val3[31:0]
1245 uint32_t __USADA8(uint32_t val1, uint32_t val2, uint32_t val3);
1248 /**************************************************************************************************/
1250 \brief Q setting dual 16-bit saturate
1252 \details This function enables you to saturate two signed 16-bit values to a selected signed range.<br>
1253 The Q bit is set if either operation saturates.
1255 \param val1 two signed 16-bit values to be saturated.
1256 \param val2 bit position for saturation, an integral constant expression in the
1261 the sum of the absolute differences of the following
1262 bytes, added to the accumulation value:
1263 \li the signed saturation of the low halfword in \em val1, saturated to the bit position
1264 specified in \em val2 and returned in the low halfword of the return value.
1265 \li the signed saturation of the high halfword in <i>val1</i>, saturated to the bit position
1266 specified in <i>val2</i> and returned in the high halfword of the return value.
1271 Saturate halfwords in val1 to the signed range specified by the bit position in val2
1274 uint32_t __SSAT16(uint32_t val1, const uint32_t val2);
1277 /**************************************************************************************************/
1279 \brief Q setting dual 16-bit unsigned saturate
1281 \details This function enables you to saturate two signed 16-bit values to a selected unsigned
1283 The Q bit is set if either operation saturates.
1285 \param val1 two 16-bit values that are to be saturated.
1286 \param val2 bit position for saturation, and must be an integral constant
1287 expression in the range 0 to 15.
1291 the saturation of the two signed 16-bit values, as non-negative values.
1292 \li the saturation of the low halfword in \em val1, saturated to the bit position
1293 specified in \em val2 and returned in the low halfword of the return value.
1294 \li the saturation of the high halfword in \em val1, saturated to the bit position
1295 specified in \em val2 and returned in the high halfword of the return value.
1300 Saturate halfwords in val1 to the unsigned range specified by the bit position in val2
1303 uint32_t __USAT16(uint32_t val1, const uint32_t val2);
1306 /**************************************************************************************************/
1308 \brief Dual extract 8-bits and zero-extend to 16-bits
1310 \details This function enables you to extract two 8-bit values from an operand and zero-extend
1311 them to 16 bits each.
1313 \param val two 8-bit values in val[7:0] and val[23:16] to be sign-extended.
1317 the 8-bit values zero-extended to 16-bit values.
1318 \li zero-extended value of val[7:0] in the low halfword of the return value.
1319 \li zero-extended value of val[23:16] in the high halfword of the return value.
1324 res[15:0] = ZeroExtended(val[7:0] )
1325 res[31:16] = ZeroExtended(val[23:16])
1328 uint32_t __UXTB16(uint32_t val);
1331 /**************************************************************************************************/
1333 \brief Extracted 16-bit to 32-bit unsigned addition
1335 \details This function enables you to extract two 8-bit values from one operand, zero-extend them
1336 to 16 bits each, and add the results to two 16-bit values from another operand.
1338 \param val1 value added to the zero-extended to 16-bit values.
1339 \param val2 two 8-bit values to be extracted and zero-extended.
1343 the 8-bit values in \em val2, zero-extended to 16-bit values
1344 and added to \em val1.
1349 res[15:0] = ZeroExt(val2[7:0] to 16 bits) + val1[15:0]
1350 res[31:16] = ZeroExt(val2[31:16] to 16 bits) + val1[31:16]
1353 uint32_t __UXTAB16(uint32_t val1, uint32_t val2);
1356 /**************************************************************************************************/
1358 \brief Dual extract 8-bits and sign extend each to 16-bits
1360 \details This function enables you to extract two 8-bit values from an operand and sign-extend
1361 them to 16 bits each.
1363 \param val two 8-bit values in val[7:0] and val[23:16] to be sign-extended.
1368 the 8-bit values sign-extended to 16-bit values.
1369 \li sign-extended value of val[7:0] in the low halfword of the return value.
1370 \li sign-extended value of val[23:16] in the high halfword of the return value.
1375 res[15:0] = SignExtended(val[7:0]
1376 res[31:16] = SignExtended(val[23:16]
1379 uint32_t __SXTB16(uint32_t val);
1382 /**************************************************************************************************/
1384 \brief Rotate right, dual extract 8-bits and sign extend each to 16-bits
1386 \details This function enables you to rotate an operand by 8/16/24 bit, extract two 8-bit values and sign-extend
1387 them to 16 bits each.
1389 \param val two 8-bit values in val[7:0] and val[23:16] to be sign-extended.
1390 \param rotate number of bits to rotate val. Constant rotate value of 8, 16 and 24 can be
1391 optimally used with a single __SXTB16 instruction. Any other valid constant rotate
1392 value will result in use of two instructions, __ROR and __SXTB16
1396 the 8-bit values sign-extended to 16-bit values.
1397 \li sign-extended value of val[7:0] in the low halfword of the return value.
1398 \li sign-extended value of val[23:16] in the high halfword of the return value.
1403 val = Rotate(val, rotate)
1404 res[15:0] = SignExtended(val[7:0])
1405 res[31:16] = SignExtended(val[23:16])
1408 uint32_t __SXTB16_RORn(uint32_t val, uint32_r rotate);
1411 /**************************************************************************************************/
1413 \brief Dual extracted 8-bit to 16-bit signed addition
1415 \details This function enables you to extract two 8-bit values from the second operand (at bit
1416 positions [7:0] and [23:16]), sign-extend them to 16-bits each, and add the results to the
1419 \param val1 values added to the zero-extended to 16-bit values.
1420 \param val2 two 8-bit values to be extracted and zero-extended.
1425 the addition of \em val1 and \em val2, where the 8-bit values in
1426 val2[7:0] and val2[23:16] have been extracted and sign-extended prior to the addition.
1431 res[15:0] = val1[15:0] + SignExtended(val2[7:0])
1432 res[31:16] = val1[31:16] + SignExtended(val2[23:16])
1435 uint32_t __SXTAB16(uint32_t val1, uint32_t val2);
1438 /**************************************************************************************************/
1440 \brief Rotate right, followed by sign extension of two 8-bits with add to 16-bits
1442 \details This function enables you to rotate the second operand by 8/16/24 bit as specified by the third
1443 operand, extract two 8-bit values from the rotated result (at bit positions [7:0] and [23:16]),
1444 sign-extend them to 16-bits each, and add the results to the first operand.
1446 \param val1 two 16-bit values in val1[15:0] and val1[31:16]
1447 \param val2 two 8-bit values in val[7:0] and val[23:16] to be sign-extended post rotation
1448 \param rotate number of bits to rotate val2. Constant rotate value of 8, 16 and 24 can be
1449 optimally used with a single __SXTAB16 instruction. Any other valid constant rotate
1450 value will result in use of two instructions, __ROR and __SXTAB16
1454 the addition of \em val1 and \em val2, where the rotated 8-bit values in
1455 val2[7:0] and val2[23:16] have been extracted and sign-extended prior to the addition.
1460 val2 = Rotate(val2, rotate)
1461 res[15:0] = val1[15:0] + SignExtended(val2[7:0])
1462 res[31:16] = val1[31:16] + SignExtended(val2[23:16])
1465 uint32_t __SXTAB16_RORn(uint32_t val1, uint32_t val2, uint32_r rotate);
1468 /**************************************************************************************************/
1470 \brief Q setting sum of dual 16-bit signed multiply
1472 \details This function enables you to perform two 16-bit signed multiplications, adding the
1473 products together.<br>
1474 The Q bit is set if the addition overflows.
1476 \param val1 first 16-bit operands for each multiplication.
1477 \param val2 second 16-bit operands for each multiplication.
1482 the sum of the products of the two 16-bit signed multiplications.
1487 p1 = val1[15:0] * val2[15:0]
1488 p2 = val1[31:16] * val2[31:16]
1492 uint32_t __SMUAD(uint32_t val1, uint32_t val2);
1495 /**************************************************************************************************/
1497 \brief Q setting sum of dual 16-bit signed multiply with exchange
1499 \details This function enables you to perform two 16-bit signed multiplications with exchanged
1500 halfwords of the second operand, adding the products together.<br>
1501 The Q bit is set if the addition overflows.
1503 \param val1 first 16-bit operands for each multiplication.
1504 \param val2 second 16-bit operands for each multiplication.
1509 the sum of the products of the two 16-bit signed multiplications with exchanged
1510 halfwords of the second operand.
1515 p1 = val1[15:0] * val2[31:16]
1516 p2 = val1[31:16] * val2[15:0]
1520 uint32_t __SMUADX(uint32_t val1, uint32_t val2);
1523 /**************************************************************************************************/
1525 \brief 32-bit signed multiply with 32-bit truncated accumulator.
1527 \details This function enables you to perform a signed 32-bit multiplications, adding the most significant 32 bits
1528 of the 64-bit result to a 32-bit accumulate operand.<br>
1530 \param val1 first operand for multiplication.
1531 \param val2 second operand for multiplication.
1532 \param val3 accumulate value.
1535 \returns the product of multiplication (most significant 32 bits) is added to the accumulate
1536 value, as a 32-bit integer.
1541 res[31:0] = p[61:32] + val3[31:0]
1544 uint32_t __SMMLA (int32_t val1, int32_t val2, int32_t val3);
1547 /**************************************************************************************************/
1549 \brief Q setting dual 16-bit signed multiply with single 32-bit accumulator
1551 \details This function enables you to perform two signed 16-bit multiplications, adding both
1552 results to a 32-bit accumulate operand.<br>
1553 The Q bit is set if the addition overflows. Overflow cannot occur during the multiplications.
1555 \param val1 first 16-bit operands for each multiplication.
1556 \param val2 second 16-bit operands for each multiplication.
1557 \param val3 accumulate value.
1561 the product of each multiplication added to the accumulate
1562 value, as a 32-bit integer.
1567 p1 = val1[15:0] * val2[15:0]
1568 p2 = val1[31:16] * val2[31:16]
1569 res[31:0] = p1 + p2 + val3[31:0]
1572 uint32_t __SMLAD(uint32_t val1, uint32_t val2, uint32_t val3);
1575 /**************************************************************************************************/
1577 \brief Q setting pre-exchanged dual 16-bit signed multiply with single 32-bit accumulator
1579 \details This function enables you to perform two signed 16-bit multiplications with exchanged
1580 halfwords of the second operand, adding both results to a 32-bit accumulate operand.<br>
1581 The Q bit is set if the addition overflows. Overflow cannot occur during the multiplications.
1583 \param val1 first 16-bit operands for each multiplication.
1584 \param val2 second 16-bit operands for each multiplication.
1585 \param val3 accumulate value.
1589 the product of each multiplication with exchanged
1590 halfwords of the second operand added to the accumulate value, as a 32-bit integer.
1595 p1 = val1[15:0] * val2[31:16]
1596 p2 = val1[31:16] * val2[15:0]
1597 res[31:0] = p1 + p2 + val3[31:0]
1600 uint32_t __SMLADX(uint32_t val1, uint32_t val2, uint32_t val3);
1603 /**************************************************************************************************/
1605 \brief Dual 16-bit signed multiply with single 64-bit accumulator
1607 \details This function enables you to perform two signed 16-bit multiplications, adding both
1608 results to a 64-bit accumulate operand. Overflow is only possible as a result of the 64-bit
1609 addition. This overflow is not detected if it occurs. Instead, the result wraps around
1610 modulo2<sup>64</sup>.
1612 \param val1 first 16-bit operands for each multiplication.
1613 \param val2 second 16-bit operands for each multiplication.
1614 \param val3 accumulate value.
1618 the product of each multiplication added to the accumulate value.
1623 p1 = val1[15:0] * val2[15:0]
1624 p2 = val1[31:16] * val2[31:16]
1625 sum = p1 + p2 + val3[63:32][31:0]
1626 res[63:32] = sum[63:32]
1627 res[31:0] = sum[31:0]
1630 uint64_t __SMLALD(uint32_t val1, uint32_t val2, uint64_t val3);
1633 /**************************************************************************************************/
1635 \brief Dual 16-bit signed multiply with exchange with single 64-bit accumulator
1637 \details This function enables you to exchange the halfwords of the second operand, and perform
1638 two signed 16-bit multiplications, adding both results to a 64-bit accumulate operand.
1639 Overflow is only possible as a result of the 64-bit addition. This overflow is not detected
1640 if it occurs. Instead, the result wraps around modulo2<sup>64</sup>.
1642 \param val1 first 16-bit operands for each multiplication.
1643 \param val2 second 16-bit operands for each multiplication.
1644 \param val3 accumulate value.
1648 the product of each multiplication added to the accumulate value.
1653 p1 = val1[15:0] * val2[31:16]
1654 p2 = val1[31:16] * val2[15:0]
1655 sum = p1 + p2 + val3[63:32][31:0]
1656 res[63:32] = sum[63:32]
1657 res[31:0] = sum[31:0]
1660 unsigned long long __SMLALDX(uint32_t val1, uint32_t val2, unsigned long long val3);
1663 /**************************************************************************************************/
1665 \brief Dual 16-bit signed multiply returning difference
1667 \details This function enables you to perform two 16-bit signed multiplications, taking the
1668 difference of the products by subtracting the high halfword product from the low
1671 \param val1 first 16-bit operands for each multiplication.
1672 \param val2 second 16-bit operands for each multiplication.
1676 the difference of the products of the two 16-bit signed multiplications.
1681 p1 = val1[15:0] * val2[15:0]
1682 p2 = val1[31:16] * val2[31:16]
1686 uint32_t __SMUSD(uint32_t val1, uint32_t val2);
1689 /**************************************************************************************************/
1691 \brief Dual 16-bit signed multiply with exchange returning difference
1693 \details This function enables you to perform two 16-bit signed multiplications, subtracting one
1694 of the products from the other. The halfwords of the second operand are exchanged
1695 before performing the arithmetic. This produces top * bottom and bottom * top
1698 \param val1 first 16-bit operands for each multiplication.
1699 \param val2 second 16-bit operands for each multiplication.
1703 the difference of the products of the two 16-bit signed multiplications.
1708 p1 = val1[15:0] * val2[31:16]
1709 p2 = val1[31:16] * val2[15:0]
1713 uint32_t __SMUSDX(uint32_t val1, uint32_t val2);
1716 /**************************************************************************************************/
1718 \brief Q setting dual 16-bit signed multiply subtract with 32-bit accumulate
1720 \details This function enables you to perform two 16-bit signed multiplications, take the
1721 difference of the products, subtracting the high halfword product from the low halfword
1722 product, and add the difference to a 32-bit accumulate operand.<br>
1723 The Q bit is set if the accumulation overflows. Overflow cannot occur during the multiplications or the
1726 \param val1 first 16-bit operands for each multiplication.
1727 \param val2 second 16-bit operands for each multiplication.
1728 \param val3 accumulate value.
1732 the difference of the product of each multiplication, added
1733 to the accumulate value.
1738 p1 = val1[15:0] * val2[15:0]
1739 p2 = val1[31:16] * val2[31:16]
1740 res[31:0] = p1 - p2 + val3[31:0]
1743 uint32_t __SMLSD(uint32_t val1, uint32_t val2, uint32_t val3);
1746 /**************************************************************************************************/
1748 \brief Q setting dual 16-bit signed multiply with exchange subtract with 32-bit accumulate
1750 \details This function enables you to exchange the halfwords in the second operand, then perform
1751 two 16-bit signed multiplications. The difference of the products is added to a 32-bit
1752 accumulate operand.<br>
1753 The Q bit is set if the addition overflows. Overflow cannot occur during the multiplications or the subtraction.
1755 \param val1 first 16-bit operands for each multiplication.
1756 \param val2 second 16-bit operands for each multiplication.
1757 \param val3 accumulate value.
1761 the difference of the product of each multiplication, added
1762 to the accumulate value.
1767 p1 = val1[15:0] * val2[31:16]
1768 p2 = val1[31:16] * val2[15:0]
1769 res[31:0] = p1 - p2 + val3[31:0]
1772 uint32_t __SMLSDX(uint32_t val1, uint32_t val2, uint32_t val3);
1775 /**************************************************************************************************/
1777 \brief Q setting dual 16-bit signed multiply subtract with 64-bit accumulate
1779 \details This function It enables you to perform two 16-bit signed multiplications, take the
1780 difference of the products, subtracting the high halfword product from the low halfword
1781 product, and add the difference to a 64-bit accumulate operand. Overflow cannot occur
1782 during the multiplications or the subtraction. Overflow can occur as a result of the 64-bit
1783 addition, and this overflow is not detected. Instead, the result wraps round to
1784 modulo2<sup>64</sup>.
1786 \param val1 first 16-bit operands for each multiplication.
1787 \param val2 second 16-bit operands for each multiplication.
1788 \param val3 accumulate value.
1792 the difference of the product of each multiplication,
1793 added to the accumulate value.
1798 p1 = val1[15:0] * val2[15:0]
1799 p2 = val1[31:16] * val2[31:16]
1800 res[63:0] = p1 - p2 + val3[63:0]
1803 uint64_t __SMLSLD(uint32_t val1, uint32_t val2, uint64_t val3);
1806 /**************************************************************************************************/
1808 \brief Q setting dual 16-bit signed multiply with exchange subtract with 64-bit accumulate
1810 \details This function enables you to exchange the halfwords of the second operand, perform two
1811 16-bit multiplications, adding the difference of the products to a 64-bit accumulate
1812 operand. Overflow cannot occur during the multiplications or the subtraction. Overflow
1813 can occur as a result of the 64-bit addition, and this overflow is not detected. Instead,
1814 the result wraps round to modulo2<sup>64</sup>.
1816 \param val1 first 16-bit operands for each multiplication.
1817 \param val2 second 16-bit operands for each multiplication.
1818 \param val3 accumulate value.
1822 the difference of the product of each multiplication,
1823 added to the accumulate value.
1828 p1 = val1[15:0] * val2[31:16]
1829 p2 = val1[31:16] * val2[15:0]
1830 res[63:0] = p1 - p2 + val3[63:0]
1833 unsigned long long __SMLSLDX(uint32_t val1, uint32_t val2, unsigned long long val3);
1836 /**************************************************************************************************/
1838 \brief Select bytes based on GE bits
1840 \details This function inserts a SEL instruction into the instruction stream generated by the
1841 compiler. It enables you to select bytes from the input parameters, whereby the bytes
1842 that are selected depend upon the results of previous SIMD instruction function. The
1843 results of previous SIMD instruction function are represented by the Greater than or
1844 Equal flags in the Application Program Status Register (APSR).
1845 The __SEL function works equally well on both halfword and byte operand function
1846 results. This is because halfword operand operations set two (duplicate) GE bits per
1849 \param val1 four selectable 8-bit values.
1850 \param val2 four selectable 8-bit values.
1854 The function selects bytes from the input parameters and returns them in the
1855 return value, res, according to the following criteria:
1856 \li if APSR.GE[0] == 1 then res[7:0] = val1[7:0] else res[7:0] = val2[7:0]
1857 \li if APSR.GE[1] == 1 then res[15:8] = val1[15:8] else res[15:8] = val2[15:8]
1858 \li if APSR.GE[2] == 1 then res[23:16] = val1[23:16] else res[23:16] = val2[23:16]
1859 \li if APSR.GE[3] == 1 then res[31;24] = val1[31:24] else res = val2[31:24]
1862 uint32_t __SEL(uint32_t val1, uint32_t val2);
1865 /**************************************************************************************************/
1867 \brief Q setting saturating add
1869 \details This function enables you to obtain the saturating add of two integers.<br>
1870 The Q bit is set if the operation saturates.
1872 \param val1 first summand of the saturating add operation.
1873 \param val2 second summand of the saturating add operation.
1877 the saturating addition of val1 and val2.
1881 res[31:0] = SAT(val1 + SAT(val2))
1884 uint32_t __QADD(uint32_t val1, uint32_t val2);
1887 /**************************************************************************************************/
1889 \brief Q setting saturating subtract
1891 \details This function enables you to obtain the saturating subtraction of two integers.<br>
1892 The Q bit is set if the operation saturates.
1894 \param val1 minuend of the saturating subtraction operation.
1895 \param val2 subtrahend of the saturating subtraction operation.
1899 the saturating subtraction of val1 and val2.
1903 res[31:0] = SAT(val1 - SAT(val2))
1906 uint32_t __QSUB(uint32_t val1, uint32_t val2);
1909 /**************************************************************************************************/
1911 \brief Halfword packing instruction. Combines bits[15:0] of <i>val1</i>
1912 with bits[31:16] of <i>val2</i> levitated with the <i>val3</i>.
1914 \details Combine a halfword from one register with a halfword from another register.
1915 The second argument can be left-shifted before extraction of the halfword. The registers
1916 PC and SP are not allowed as arguments. This instruction does not change the flags.
1918 \param val1 first 16-bit operands
1919 \param val2 second 16-bit operands
1920 \param val3 value for left-shifting <i>val2</i>. Value range [0..31].
1924 the combination of halfwords.
1928 res[15:0] = val1[15:0]
1929 res[31:16] = val2[31:16]<<val3
1932 uint32_t __PKHBT(uint32_t val1, uint32_t val2, uint32_t val3);
1935 /**************************************************************************************************/
1937 \brief Halfword packing instruction. Combines bits[31:16] of <i>val1</i>
1938 with bits[15:0] of <i>val2</i> right-shifted with the <i>val3</i>.
1940 \details Combines a halfword from one register with a halfword from another register.
1941 The second argument can be right-shifted before extraction of the halfword. The registers
1942 PC and SP are not allowed as arguments. This instruction does not change the flags.
1944 \param val1 second 16-bit operands
1945 \param val2 first 16-bit operands
1946 \param val3 value for right-shifting <i>val2</i>. Value range [1..32].
1950 the combination of halfwords.
1954 res[15:0] = val2[15:0]>>val3
1955 res[31:16] = val1[31:16]
1958 uint32_t __PKHTB(uint32_t val1, uint32_t val2, uint32_t val3);
1960 /** @} */ /* end group intrinsic_SIMD_gr */