3 * Copyright (C) 2006-2014 wolfSSL Inc.
5 * This file is part of CyaSSL.
7 * CyaSSL is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * CyaSSL is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 /* See IntelĀ® Advanced Encryption Standard (AES) Instructions Set White Paper
24 * by Intel Mobility Group, Israel Development Center, Israel Shay Gueron
27 /* This file is in at&t asm syntax, see .asm for intel syntax */
31 AES_CBC_encrypt (const unsigned char *in,
33 unsigned char ivec[16],
35 const unsigned char *KS,
38 .globl AES_CBC_encrypt
82 aesenclast %xmm2,%xmm1
91 AES_CBC_decrypt (const unsigned char *in,
93 unsigned char ivec[16],
95 const unsigned char *KS,
98 .globl AES_CBC_decrypt
122 movdqu 16(%rdi), %xmm2
123 movdqu 32(%rdi), %xmm3
124 movdqu 48(%rdi), %xmm4
130 movdqa 16(%r8), %xmm10
131 movdqa 32(%r8), %xmm11
132 movdqa 48(%r8), %xmm12
150 movdqa 64(%r8), %xmm9
151 movdqa 80(%r8), %xmm10
152 movdqa 96(%r8), %xmm11
153 movdqa 112(%r8), %xmm12
170 movdqa 128(%r8), %xmm9
171 movdqa 144(%r8), %xmm10
172 movdqa 160(%r8), %xmm11
183 movdqa 160(%r8), %xmm9
184 movdqa 176(%r8), %xmm10
185 movdqa 192(%r8), %xmm11
197 movdqa 192(%r8), %xmm9
198 movdqa 208(%r8), %xmm10
199 movdqa 224(%r8), %xmm11
212 aesdeclast %xmm11, %xmm1
213 aesdeclast %xmm11, %xmm2
214 aesdeclast %xmm11, %xmm3
215 aesdeclast %xmm11, %xmm4
221 movdqu %xmm2, 16(%rsi)
222 movdqu %xmm3, 32(%rsi)
223 movdqu %xmm4, 48(%rsi)
235 movdqu 160(%r8), %xmm2
237 aesdec 16(%r8), %xmm1
238 aesdec 32(%r8), %xmm1
239 aesdec 48(%r8), %xmm1
240 aesdec 64(%r8), %xmm1
241 aesdec 80(%r8), %xmm1
242 aesdec 96(%r8), %xmm1
243 aesdec 112(%r8), %xmm1
244 aesdec 128(%r8), %xmm1
245 aesdec 144(%r8), %xmm1
247 movdqu 192(%r8), %xmm2
249 aesdec 160(%r8), %xmm1
250 aesdec 176(%r8), %xmm1
252 movdqu 224(%r8), %xmm2
253 aesdec 192(%r8), %xmm1
254 aesdec 208(%r8), %xmm1
256 aesdeclast %xmm2, %xmm1
269 AES_ECB_encrypt (const unsigned char *in,
271 unsigned long length,
272 const unsigned char *KS,
275 .globl AES_ECB_encrypt
296 movdqu 16(%rdi), %xmm2
297 movdqu 32(%rdi), %xmm3
298 movdqu 48(%rdi), %xmm4
300 movdqa 16(%rcx), %xmm10
301 movdqa 32(%rcx), %xmm11
302 movdqa 48(%rcx), %xmm12
319 movdqa 64(%rcx), %xmm9
320 movdqa 80(%rcx), %xmm10
321 movdqa 96(%rcx), %xmm11
322 movdqa 112(%rcx), %xmm12
339 movdqa 128(%rcx), %xmm9
340 movdqa 144(%rcx), %xmm10
341 movdqa 160(%rcx), %xmm11
352 movdqa 160(%rcx), %xmm9
353 movdqa 176(%rcx), %xmm10
354 movdqa 192(%rcx), %xmm11
365 movdqa 192(%rcx), %xmm9
366 movdqa 208(%rcx), %xmm10
367 movdqa 224(%rcx), %xmm11
380 aesenclast %xmm11, %xmm1
381 aesenclast %xmm11, %xmm2
382 aesenclast %xmm11, %xmm3
383 aesenclast %xmm11, %xmm4
385 movdqu %xmm2, 16(%rsi)
386 movdqu %xmm3, 32(%rsi)
387 movdqu %xmm4, 48(%rsi)
397 movdqu 160(%rcx), %xmm2
398 aesenc 16(%rcx), %xmm1
399 aesenc 32(%rcx), %xmm1
400 aesenc 48(%rcx), %xmm1
401 aesenc 64(%rcx), %xmm1
402 aesenc 80(%rcx), %xmm1
403 aesenc 96(%rcx), %xmm1
404 aesenc 112(%rcx), %xmm1
405 aesenc 128(%rcx), %xmm1
406 aesenc 144(%rcx), %xmm1
409 movdqu 192(%rcx), %xmm2
410 aesenc 160(%rcx), %xmm1
411 aesenc 176(%rcx), %xmm1
414 movdqu 224(%rcx), %xmm2
415 aesenc 192(%rcx), %xmm1
416 aesenc 208(%rcx), %xmm1
418 aesenclast %xmm2, %xmm1
428 AES_ECB_decrypt (const unsigned char *in,
430 unsigned long length,
431 const unsigned char *KS,
434 .globl AES_ECB_decrypt
456 movdqu 16(%rdi), %xmm2
457 movdqu 32(%rdi), %xmm3
458 movdqu 48(%rdi), %xmm4
460 movdqa 16(%rcx), %xmm10
461 movdqa 32(%rcx), %xmm11
462 movdqa 48(%rcx), %xmm12
479 movdqa 64(%rcx), %xmm9
480 movdqa 80(%rcx), %xmm10
481 movdqa 96(%rcx), %xmm11
482 movdqa 112(%rcx), %xmm12
499 movdqa 128(%rcx), %xmm9
500 movdqa 144(%rcx), %xmm10
501 movdqa 160(%rcx), %xmm11
512 movdqa 160(%rcx), %xmm9
513 movdqa 176(%rcx), %xmm10
514 movdqa 192(%rcx), %xmm11
525 movdqa 192(%rcx), %xmm9
526 movdqa 208(%rcx), %xmm10
527 movdqa 224(%rcx), %xmm11
540 aesdeclast %xmm11, %xmm1
541 aesdeclast %xmm11, %xmm2
542 aesdeclast %xmm11, %xmm3
543 aesdeclast %xmm11, %xmm4
545 movdqu %xmm2, 16(%rsi)
546 movdqu %xmm3, 32(%rsi)
547 movdqu %xmm4, 48(%rsi)
557 movdqu 160(%rcx), %xmm2
559 aesdec 16(%rcx), %xmm1
560 aesdec 32(%rcx), %xmm1
561 aesdec 48(%rcx), %xmm1
562 aesdec 64(%rcx), %xmm1
563 aesdec 80(%rcx), %xmm1
564 aesdec 96(%rcx), %xmm1
565 aesdec 112(%rcx), %xmm1
566 aesdec 128(%rcx), %xmm1
567 aesdec 144(%rcx), %xmm1
570 movdqu 192(%rcx), %xmm2
571 aesdec 160(%rcx), %xmm1
572 aesdec 176(%rcx), %xmm1
574 movdqu 224(%rcx), %xmm2
575 aesdec 192(%rcx), %xmm1
576 aesdec 208(%rcx), %xmm1
578 aesdeclast %xmm2, %xmm1
590 void AES_128_Key_Expansion(const unsigned char* userkey,
591 unsigned char* key_schedule);
594 .globl AES_128_Key_Expansion
595 AES_128_Key_Expansion:
605 aeskeygenassist $1, %xmm1, %xmm2
606 call PREPARE_ROUNDKEY_128
607 movdqa %xmm1, 16(%rsi)
608 aeskeygenassist $2, %xmm1, %xmm2
609 call PREPARE_ROUNDKEY_128
610 movdqa %xmm1, 32(%rsi)
611 aeskeygenassist $4, %xmm1, %xmm2
612 call PREPARE_ROUNDKEY_128
613 movdqa %xmm1, 48(%rsi)
614 aeskeygenassist $8, %xmm1, %xmm2
615 call PREPARE_ROUNDKEY_128
616 movdqa %xmm1, 64(%rsi)
617 aeskeygenassist $16, %xmm1, %xmm2
618 call PREPARE_ROUNDKEY_128
619 movdqa %xmm1, 80(%rsi)
620 aeskeygenassist $32, %xmm1, %xmm2
621 call PREPARE_ROUNDKEY_128
622 movdqa %xmm1, 96(%rsi)
623 aeskeygenassist $64, %xmm1, %xmm2
624 call PREPARE_ROUNDKEY_128
625 movdqa %xmm1, 112(%rsi)
626 aeskeygenassist $0x80, %xmm1, %xmm2
627 call PREPARE_ROUNDKEY_128
628 movdqa %xmm1, 128(%rsi)
629 aeskeygenassist $0x1b, %xmm1, %xmm2
630 call PREPARE_ROUNDKEY_128
631 movdqa %xmm1, 144(%rsi)
632 aeskeygenassist $0x36, %xmm1, %xmm2
633 call PREPARE_ROUNDKEY_128
634 movdqa %xmm1, 160(%rsi)
637 PREPARE_ROUNDKEY_128:
638 pshufd $255, %xmm2, %xmm2
651 void AES_192_Key_Expansion (const unsigned char *userkey,
654 .globl AES_192_Key_Expansion
655 AES_192_Key_Expansion:
660 movdqu 16(%rdi), %xmm3
664 aeskeygenassist $0x1, %xmm3, %xmm2
665 call PREPARE_ROUNDKEY_192
666 shufpd $0, %xmm1, %xmm5
667 movdqa %xmm5, 16(%rsi)
669 shufpd $1, %xmm3, %xmm6
670 movdqa %xmm6, 32(%rsi)
672 aeskeygenassist $0x2, %xmm3, %xmm2
673 call PREPARE_ROUNDKEY_192
674 movdqa %xmm1, 48(%rsi)
677 aeskeygenassist $0x4, %xmm3, %xmm2
678 call PREPARE_ROUNDKEY_192
679 shufpd $0, %xmm1, %xmm5
680 movdqa %xmm5, 64(%rsi)
682 shufpd $1, %xmm3, %xmm6
683 movdqa %xmm6, 80(%rsi)
685 aeskeygenassist $0x8, %xmm3, %xmm2
686 call PREPARE_ROUNDKEY_192
687 movdqa %xmm1, 96(%rsi)
690 aeskeygenassist $0x10, %xmm3, %xmm2
691 call PREPARE_ROUNDKEY_192
692 shufpd $0, %xmm1, %xmm5
693 movdqa %xmm5, 112(%rsi)
695 shufpd $1, %xmm3, %xmm6
696 movdqa %xmm6, 128(%rsi)
698 aeskeygenassist $0x20, %xmm3, %xmm2
699 call PREPARE_ROUNDKEY_192
700 movdqa %xmm1, 144(%rsi)
703 aeskeygenassist $0x40, %xmm3, %xmm2
704 call PREPARE_ROUNDKEY_192
705 shufpd $0, %xmm1, %xmm5
706 movdqa %xmm5, 160(%rsi)
708 shufpd $1, %xmm3, %xmm6
709 movdqa %xmm6, 176(%rsi)
711 aeskeygenassist $0x80, %xmm3, %xmm2
712 call PREPARE_ROUNDKEY_192
713 movdqa %xmm1, 192(%rsi)
714 movdqa %xmm3, 208(%rsi)
717 PREPARE_ROUNDKEY_192:
718 pshufd $0x55, %xmm2, %xmm2
728 pshufd $0xff, %xmm1, %xmm2
737 void AES_256_Key_Expansion (const unsigned char *userkey,
740 .globl AES_256_Key_Expansion
741 AES_256_Key_Expansion:
746 movdqu 16(%rdi), %xmm3
748 movdqa %xmm3, 16(%rsi)
750 aeskeygenassist $0x1, %xmm3, %xmm2
752 movdqa %xmm1, 32(%rsi)
753 aeskeygenassist $0x0, %xmm1, %xmm2
755 movdqa %xmm3, 48(%rsi)
756 aeskeygenassist $0x2, %xmm3, %xmm2
758 movdqa %xmm1, 64(%rsi)
759 aeskeygenassist $0x0, %xmm1, %xmm2
761 movdqa %xmm3, 80(%rsi)
762 aeskeygenassist $0x4, %xmm3, %xmm2
764 movdqa %xmm1, 96(%rsi)
765 aeskeygenassist $0x0, %xmm1, %xmm2
767 movdqa %xmm3, 112(%rsi)
768 aeskeygenassist $0x8, %xmm3, %xmm2
770 movdqa %xmm1, 128(%rsi)
771 aeskeygenassist $0x0, %xmm1, %xmm2
773 movdqa %xmm3, 144(%rsi)
774 aeskeygenassist $0x10, %xmm3, %xmm2
776 movdqa %xmm1, 160(%rsi)
777 aeskeygenassist $0x0, %xmm1, %xmm2
779 movdqa %xmm3, 176(%rsi)
780 aeskeygenassist $0x20, %xmm3, %xmm2
782 movdqa %xmm1, 192(%rsi)
784 aeskeygenassist $0x0, %xmm1, %xmm2
786 movdqa %xmm3, 208(%rsi)
787 aeskeygenassist $0x40, %xmm3, %xmm2
789 movdqa %xmm1, 224(%rsi)
794 pshufd $0xff, %xmm2, %xmm2
806 pshufd $0xaa, %xmm2, %xmm2