.file "pax32_AES_Encryption.c"
.text
.align 2
.global main
.type main, %function
@==================================================
main:
@ args = 0, pretend = 0, frame = 8
@ frame_needed = 1, uses_anonymous_args = 0
@ ----------------------------------------------
@ Test one block of AES-128 encryption algorithm
@ on PAX-32 FPGA platform
@ ----------------------------------------------
@ ------------- Test vectors -----------------
@ source: "A Specification for The AES Algorithm" by
@ Rijndael (by Joan Daemen & Vincent Rijmen)
@ PLAINTEXT: 3243f6a8885a308d313198a2e0370734 (pi * 2^124)
@ KEY: 2b7e151628aed2a6abf7158809cf4f3c ( e * 2^124)
@ ENCRYPT
@ R[ 0].input 3243f6a8885a308d313198a2e0370734
@ R[ 0].k_sch 2b7e151628aed2a6abf7158809cf4f3c
@ R[ 1].start 193de3bea0f4e22b9ac68d2ae9f84808
@ R[ 1].s_box d42711aee0bf98f1b8b45de51e415230
@ R[ 1].s_row d4bf5d30e0b452aeb84111f11e2798e5
@ R[ 1].m_col 046681e5e0cb199a48f8d37a2806264c
@ R[ 1].k_sch a0fafe1788542cb123a339392a6c7605
@ R[ 2].start a49c7ff2689f352b6b5bea43026a5049
@ R[ 2].s_box 49ded28945db96f17f39871a7702533b
@ R[ 2].s_row 49db873b453953897f02d2f177de961a
@ R[ 2].m_col 584dcaf11b4b5aacdbe7caa81b6bb0e5
@ R[ 2].k_sch f2c295f27a96b9435935807a7359f67f
@ R[ 3].start aa8f5f0361dde3ef82d24ad26832469a
@ R[ 3].s_box ac73cf7befc111df13b5d6b545235ab8
@ R[ 3].s_row acc1d6b8efb55a7b1323cfdf457311b5
@ R[ 3].m_col 75ec0993200b633353c0cf7cbb25d0dc
@ R[ 3].k_sch 3d80477d4716fe3e1e237e446d7a883b
@ R[ 4].start 486c4eee671d9d0d4de3b138d65f58e7
@ R[ 4].s_box 52502f2885a45ed7e311c807f6cf6a94
@ R[ 4].s_row 52a4c89485116a28e3cf2fd7f6505e07
@ R[ 4].m_col 0fd6daa9603138bf6fc0106b5eb31301
@ R[ 4].k_sch ef44a541a8525b7fb671253bdb0bad00
@ R[ 5].start e0927fe8c86363c0d9b1355085b8be01
@ R[ 5].s_box e14fd29be8fbfbba35c89653976cae7c
@ R[ 5].s_row e1fb967ce8c8ae9b356cd2ba974ffb53
@ R[ 5].m_col 25d1a9adbd11d168b63a338e4c4cc0b0
@ R[ 5].k_sch d4d1c6f87c839d87caf2b8bc11f915bc
@ R[ 6].start f1006f55c1924cef7cc88b325db5d50c
@ R[ 6].s_box a163a8fc784f29df10e83d234cd503fe
@ R[ 6].s_row a14f3dfe78e803fc10d5a8df4c632923
@ R[ 6].m_col 4b868d6d2c4a8980339df4e837d218d8
@ R[ 6].k_sch 6d88a37a110b3efddbf98641ca0093fd
@ R[ 7].start 260e2e173d41b77de86472a9fdd28b25
@ R[ 7].s_box f7ab31f02783a9ff9b4340d354b53d3f
@ R[ 7].s_row f783403f27433df09bb531ff54aba9d3
@ R[ 7].m_col 1415b5bf461615ec274656d7342ad843
@ R[ 7].k_sch 4e54f70e5f5fc9f384a64fb24ea6dc4f
@ R[ 8].start 5a4142b11949dc1fa3e019657a8c040c
@ R[ 8].s_box be832cc8d43b86c00ae1d44dda64f2fe
@ R[ 8].s_row be3bd4fed4e1f2c80a642cc0da83864d
@ R[ 8].m_col 00512fd1b1c889ff54766dcdfa1b99ea
@ R[ 8].k_sch ead27321b58dbad2312bf5607f8d292f
@ R[ 9].start ea835cf00445332d655d98ad8596b0c5
@ R[ 9].s_box 87ec4a8cf26ec3d84d4c46959790e7a6
@ R[ 9].s_row 876e46a6f24ce78c4d904ad897ecc395
@ R[ 9].m_col 473794ed40d4e4a5a3703aa64c9f42bc
@ R[ 9].k_sch ac7766f319fadc2128d12941575c006e
@ R[10].start eb40f21e592e38848ba113e71bc342d2
@ R[10].s_box e9098972cb31075f3d327d94af2e2cb5
@ R[10].s_row e9317db5cb322c723d2e895faf090794
@ R[10].k_sch d014f9a8c9ee2589e13f0cc8b6630ca6
@ R[10].output 3925841d02dc09fbdc118597196a0b32
@ ------------ Load TA table into memory ------------
@ The following pax-32 asm code for load TA table
@ into memory block is generated by arm_aes128.c
@ load one TA table into memory
@ r14 = start address of TA memory
loadi.z.0 r14, #0x0
@entry = 0x00 (0) for TA table
loadi.z.0 r16, #0xa5c6
loadi.k.1 r16, #0x6363
store r16, r14, #0
@entry = 0x01 (1) for TA table
loadi.z.0 r16, #0x84f8
loadi.k.1 r16, #0x7c7c
store r16, r14, #1
@entry = 0x02 (2) for TA table
loadi.z.0 r16, #0x99ee
loadi.k.1 r16, #0x7777
store r16, r14, #2
@entry = 0x03 (3) for TA table
loadi.z.0 r16, #0x8df6
loadi.k.1 r16, #0x7b7b
store r16, r14, #3
@entry = 0x04 (4) for TA table
loadi.z.0 r16, #0x0dff
loadi.k.1 r16, #0xf2f2
store r16, r14, #4
@entry = 0x05 (5) for TA table
loadi.z.0 r16, #0xbdd6
loadi.k.1 r16, #0x6b6b
store r16, r14, #5
@entry = 0x06 (6) for TA table
loadi.z.0 r16, #0xb1de
loadi.k.1 r16, #0x6f6f
store r16, r14, #6
@entry = 0x07 (7) for TA table
loadi.z.0 r16, #0x5491
loadi.k.1 r16, #0xc5c5
store r16, r14, #7
@entry = 0x08 (8) for TA table
loadi.z.0 r16, #0x5060
loadi.k.1 r16, #0x3030
store r16, r14, #8
@entry = 0x09 (9) for TA table
loadi.z.0 r16, #0x0302
loadi.k.1 r16, #0x0101
store r16, r14, #9
@entry = 0x0a (10) for TA table
loadi.z.0 r16, #0xa9ce
loadi.k.1 r16, #0x6767
store r16, r14, #10
@entry = 0x0b (11) for TA table
loadi.z.0 r16, #0x7d56
loadi.k.1 r16, #0x2b2b
store r16, r14, #11
@entry = 0x0c (12) for TA table
loadi.z.0 r16, #0x19e7
loadi.k.1 r16, #0xfefe
store r16, r14, #12
@entry = 0x0d (13) for TA table
loadi.z.0 r16, #0x62b5
loadi.k.1 r16, #0xd7d7
store r16, r14, #13
@entry = 0x0e (14) for TA table
loadi.z.0 r16, #0xe64d
loadi.k.1 r16, #0xabab
store r16, r14, #14
@entry = 0x0f (15) for TA table
loadi.z.0 r16, #0x9aec
loadi.k.1 r16, #0x7676
store r16, r14, #15
@entry = 0x10 (16) for TA table
loadi.z.0 r16, #0x458f
loadi.k.1 r16, #0xcaca
store r16, r14, #16
@entry = 0x11 (17) for TA table
loadi.z.0 r16, #0x9d1f
loadi.k.1 r16, #0x8282
store r16, r14, #17
@entry = 0x12 (18) for TA table
loadi.z.0 r16, #0x4089
loadi.k.1 r16, #0xc9c9
store r16, r14, #18
@entry = 0x13 (19) for TA table
loadi.z.0 r16, #0x87fa
loadi.k.1 r16, #0x7d7d
store r16, r14, #19
@entry = 0x14 (20) for TA table
loadi.z.0 r16, #0x15ef
loadi.k.1 r16, #0xfafa
store r16, r14, #20
@entry = 0x15 (21) for TA table
loadi.z.0 r16, #0xebb2
loadi.k.1 r16, #0x5959
store r16, r14, #21
@entry = 0x16 (22) for TA table
loadi.z.0 r16, #0xc98e
loadi.k.1 r16, #0x4747
store r16, r14, #22
@entry = 0x17 (23) for TA table
loadi.z.0 r16, #0x0bfb
loadi.k.1 r16, #0xf0f0
store r16, r14, #23
@entry = 0x18 (24) for TA table
loadi.z.0 r16, #0xec41
loadi.k.1 r16, #0xadad
store r16, r14, #24
@entry = 0x19 (25) for TA table
loadi.z.0 r16, #0x67b3
loadi.k.1 r16, #0xd4d4
store r16, r14, #25
@entry = 0x1a (26) for TA table
loadi.z.0 r16, #0xfd5f
loadi.k.1 r16, #0xa2a2
store r16, r14, #26
@entry = 0x1b (27) for TA table
loadi.z.0 r16, #0xea45
loadi.k.1 r16, #0xafaf
store r16, r14, #27
@entry = 0x1c (28) for TA table
loadi.z.0 r16, #0xbf23
loadi.k.1 r16, #0x9c9c
store r16, r14, #28
@entry = 0x1d (29) for TA table
loadi.z.0 r16, #0xf753
loadi.k.1 r16, #0xa4a4
store r16, r14, #29
@entry = 0x1e (30) for TA table
loadi.z.0 r16, #0x96e4
loadi.k.1 r16, #0x7272
store r16, r14, #30
@entry = 0x1f (31) for TA table
loadi.z.0 r16, #0x5b9b
loadi.k.1 r16, #0xc0c0
store r16, r14, #31
@entry = 0x20 (32) for TA table
loadi.z.0 r16, #0xc275
loadi.k.1 r16, #0xb7b7
store r16, r14, #32
@entry = 0x21 (33) for TA table
loadi.z.0 r16, #0x1ce1
loadi.k.1 r16, #0xfdfd
store r16, r14, #33
@entry = 0x22 (34) for TA table
loadi.z.0 r16, #0xae3d
loadi.k.1 r16, #0x9393
store r16, r14, #34
@entry = 0x23 (35) for TA table
loadi.z.0 r16, #0x6a4c
loadi.k.1 r16, #0x2626
store r16, r14, #35
@entry = 0x24 (36) for TA table
loadi.z.0 r16, #0x5a6c
loadi.k.1 r16, #0x3636
store r16, r14, #36
@entry = 0x25 (37) for TA table
loadi.z.0 r16, #0x417e
loadi.k.1 r16, #0x3f3f
store r16, r14, #37
@entry = 0x26 (38) for TA table
loadi.z.0 r16, #0x02f5
loadi.k.1 r16, #0xf7f7
store r16, r14, #38
@entry = 0x27 (39) for TA table
loadi.z.0 r16, #0x4f83
loadi.k.1 r16, #0xcccc
store r16, r14, #39
@entry = 0x28 (40) for TA table
loadi.z.0 r16, #0x5c68
loadi.k.1 r16, #0x3434
store r16, r14, #40
@entry = 0x29 (41) for TA table
loadi.z.0 r16, #0xf451
loadi.k.1 r16, #0xa5a5
store r16, r14, #41
@entry = 0x2a (42) for TA table
loadi.z.0 r16, #0x34d1
loadi.k.1 r16, #0xe5e5
store r16, r14, #42
@entry = 0x2b (43) for TA table
loadi.z.0 r16, #0x08f9
loadi.k.1 r16, #0xf1f1
store r16, r14, #43
@entry = 0x2c (44) for TA table
loadi.z.0 r16, #0x93e2
loadi.k.1 r16, #0x7171
store r16, r14, #44
@entry = 0x2d (45) for TA table
loadi.z.0 r16, #0x73ab
loadi.k.1 r16, #0xd8d8
store r16, r14, #45
@entry = 0x2e (46) for TA table
loadi.z.0 r16, #0x5362
loadi.k.1 r16, #0x3131
store r16, r14, #46
@entry = 0x2f (47) for TA table
loadi.z.0 r16, #0x3f2a
loadi.k.1 r16, #0x1515
store r16, r14, #47
@entry = 0x30 (48) for TA table
loadi.z.0 r16, #0x0c08
loadi.k.1 r16, #0x0404
store r16, r14, #48
@entry = 0x31 (49) for TA table
loadi.z.0 r16, #0x5295
loadi.k.1 r16, #0xc7c7
store r16, r14, #49
@entry = 0x32 (50) for TA table
loadi.z.0 r16, #0x6546
loadi.k.1 r16, #0x2323
store r16, r14, #50
@entry = 0x33 (51) for TA table
loadi.z.0 r16, #0x5e9d
loadi.k.1 r16, #0xc3c3
store r16, r14, #51
@entry = 0x34 (52) for TA table
loadi.z.0 r16, #0x2830
loadi.k.1 r16, #0x1818
store r16, r14, #52
@entry = 0x35 (53) for TA table
loadi.z.0 r16, #0xa137
loadi.k.1 r16, #0x9696
store r16, r14, #53
@entry = 0x36 (54) for TA table
loadi.z.0 r16, #0x0f0a
loadi.k.1 r16, #0x0505
store r16, r14, #54
@entry = 0x37 (55) for TA table
loadi.z.0 r16, #0xb52f
loadi.k.1 r16, #0x9a9a
store r16, r14, #55
@entry = 0x38 (56) for TA table
loadi.z.0 r16, #0x090e
loadi.k.1 r16, #0x0707
store r16, r14, #56
@entry = 0x39 (57) for TA table
loadi.z.0 r16, #0x3624
loadi.k.1 r16, #0x1212
store r16, r14, #57
@entry = 0x3a (58) for TA table
loadi.z.0 r16, #0x9b1b
loadi.k.1 r16, #0x8080
store r16, r14, #58
@entry = 0x3b (59) for TA table
loadi.z.0 r16, #0x3ddf
loadi.k.1 r16, #0xe2e2
store r16, r14, #59
@entry = 0x3c (60) for TA table
loadi.z.0 r16, #0x26cd
loadi.k.1 r16, #0xebeb
store r16, r14, #60
@entry = 0x3d (61) for TA table
loadi.z.0 r16, #0x694e
loadi.k.1 r16, #0x2727
store r16, r14, #61
@entry = 0x3e (62) for TA table
loadi.z.0 r16, #0xcd7f
loadi.k.1 r16, #0xb2b2
store r16, r14, #62
@entry = 0x3f (63) for TA table
loadi.z.0 r16, #0x9fea
loadi.k.1 r16, #0x7575
store r16, r14, #63
@entry = 0x40 (64) for TA table
loadi.z.0 r16, #0x1b12
loadi.k.1 r16, #0x0909
store r16, r14, #64
@entry = 0x41 (65) for TA table
loadi.z.0 r16, #0x9e1d
loadi.k.1 r16, #0x8383
store r16, r14, #65
@entry = 0x42 (66) for TA table
loadi.z.0 r16, #0x7458
loadi.k.1 r16, #0x2c2c
store r16, r14, #66
@entry = 0x43 (67) for TA table
loadi.z.0 r16, #0x2e34
loadi.k.1 r16, #0x1a1a
store r16, r14, #67
@entry = 0x44 (68) for TA table
loadi.z.0 r16, #0x2d36
loadi.k.1 r16, #0x1b1b
store r16, r14, #68
@entry = 0x45 (69) for TA table
loadi.z.0 r16, #0xb2dc
loadi.k.1 r16, #0x6e6e
store r16, r14, #69
@entry = 0x46 (70) for TA table
loadi.z.0 r16, #0xeeb4
loadi.k.1 r16, #0x5a5a
store r16, r14, #70
@entry = 0x47 (71) for TA table
loadi.z.0 r16, #0xfb5b
loadi.k.1 r16, #0xa0a0
store r16, r14, #71
@entry = 0x48 (72) for TA table
loadi.z.0 r16, #0xf6a4
loadi.k.1 r16, #0x5252
store r16, r14, #72
@entry = 0x49 (73) for TA table
loadi.z.0 r16, #0x4d76
loadi.k.1 r16, #0x3b3b
store r16, r14, #73
@entry = 0x4a (74) for TA table
loadi.z.0 r16, #0x61b7
loadi.k.1 r16, #0xd6d6
store r16, r14, #74
@entry = 0x4b (75) for TA table
loadi.z.0 r16, #0xce7d
loadi.k.1 r16, #0xb3b3
store r16, r14, #75
@entry = 0x4c (76) for TA table
loadi.z.0 r16, #0x7b52
loadi.k.1 r16, #0x2929
store r16, r14, #76
@entry = 0x4d (77) for TA table
loadi.z.0 r16, #0x3edd
loadi.k.1 r16, #0xe3e3
store r16, r14, #77
@entry = 0x4e (78) for TA table
loadi.z.0 r16, #0x715e
loadi.k.1 r16, #0x2f2f
store r16, r14, #78
@entry = 0x4f (79) for TA table
loadi.z.0 r16, #0x9713
loadi.k.1 r16, #0x8484
store r16, r14, #79
@entry = 0x50 (80) for TA table
loadi.z.0 r16, #0xf5a6
loadi.k.1 r16, #0x5353
store r16, r14, #80
@entry = 0x51 (81) for TA table
loadi.z.0 r16, #0x68b9
loadi.k.1 r16, #0xd1d1
store r16, r14, #81
@entry = 0x52 (82) for TA table
loadi.z.0 r16, #0x0000
loadi.k.1 r16, #0x0000
store r16, r14, #82
@entry = 0x53 (83) for TA table
loadi.z.0 r16, #0x2cc1
loadi.k.1 r16, #0xeded
store r16, r14, #83
@entry = 0x54 (84) for TA table
loadi.z.0 r16, #0x6040
loadi.k.1 r16, #0x2020
store r16, r14, #84
@entry = 0x55 (85) for TA table
loadi.z.0 r16, #0x1fe3
loadi.k.1 r16, #0xfcfc
store r16, r14, #85
@entry = 0x56 (86) for TA table
loadi.z.0 r16, #0xc879
loadi.k.1 r16, #0xb1b1
store r16, r14, #86
@entry = 0x57 (87) for TA table
loadi.z.0 r16, #0xedb6
loadi.k.1 r16, #0x5b5b
store r16, r14, #87
@entry = 0x58 (88) for TA table
loadi.z.0 r16, #0xbed4
loadi.k.1 r16, #0x6a6a
store r16, r14, #88
@entry = 0x59 (89) for TA table
loadi.z.0 r16, #0x468d
loadi.k.1 r16, #0xcbcb
store r16, r14, #89
@entry = 0x5a (90) for TA table
loadi.z.0 r16, #0xd967
loadi.k.1 r16, #0xbebe
store r16, r14, #90
@entry = 0x5b (91) for TA table
loadi.z.0 r16, #0x4b72
loadi.k.1 r16, #0x3939
store r16, r14, #91
@entry = 0x5c (92) for TA table
loadi.z.0 r16, #0xde94
loadi.k.1 r16, #0x4a4a
store r16, r14, #92
@entry = 0x5d (93) for TA table
loadi.z.0 r16, #0xd498
loadi.k.1 r16, #0x4c4c
store r16, r14, #93
@entry = 0x5e (94) for TA table
loadi.z.0 r16, #0xe8b0
loadi.k.1 r16, #0x5858
store r16, r14, #94
@entry = 0x5f (95) for TA table
loadi.z.0 r16, #0x4a85
loadi.k.1 r16, #0xcfcf
store r16, r14, #95
@entry = 0x60 (96) for TA table
loadi.z.0 r16, #0x6bbb
loadi.k.1 r16, #0xd0d0
store r16, r14, #96
@entry = 0x61 (97) for TA table
loadi.z.0 r16, #0x2ac5
loadi.k.1 r16, #0xefef
store r16, r14, #97
@entry = 0x62 (98) for TA table
loadi.z.0 r16, #0xe54f
loadi.k.1 r16, #0xaaaa
store r16, r14, #98
@entry = 0x63 (99) for TA table
loadi.z.0 r16, #0x16ed
loadi.k.1 r16, #0xfbfb
store r16, r14, #99
@entry = 0x64 (100) for TA table
loadi.z.0 r16, #0xc586
loadi.k.1 r16, #0x4343
store r16, r14, #100
@entry = 0x65 (101) for TA table
loadi.z.0 r16, #0xd79a
loadi.k.1 r16, #0x4d4d
store r16, r14, #101
@entry = 0x66 (102) for TA table
loadi.z.0 r16, #0x5566
loadi.k.1 r16, #0x3333
store r16, r14, #102
@entry = 0x67 (103) for TA table
loadi.z.0 r16, #0x9411
loadi.k.1 r16, #0x8585
store r16, r14, #103
@entry = 0x68 (104) for TA table
loadi.z.0 r16, #0xcf8a
loadi.k.1 r16, #0x4545
store r16, r14, #104
@entry = 0x69 (105) for TA table
loadi.z.0 r16, #0x10e9
loadi.k.1 r16, #0xf9f9
store r16, r14, #105
@entry = 0x6a (106) for TA table
loadi.z.0 r16, #0x0604
loadi.k.1 r16, #0x0202
store r16, r14, #106
@entry = 0x6b (107) for TA table
loadi.z.0 r16, #0x81fe
loadi.k.1 r16, #0x7f7f
store r16, r14, #107
@entry = 0x6c (108) for TA table
loadi.z.0 r16, #0xf0a0
loadi.k.1 r16, #0x5050
store r16, r14, #108
@entry = 0x6d (109) for TA table
loadi.z.0 r16, #0x4478
loadi.k.1 r16, #0x3c3c
store r16, r14, #109
@entry = 0x6e (110) for TA table
loadi.z.0 r16, #0xba25
loadi.k.1 r16, #0x9f9f
store r16, r14, #110
@entry = 0x6f (111) for TA table
loadi.z.0 r16, #0xe34b
loadi.k.1 r16, #0xa8a8
store r16, r14, #111
@entry = 0x70 (112) for TA table
loadi.z.0 r16, #0xf3a2
loadi.k.1 r16, #0x5151
store r16, r14, #112
@entry = 0x71 (113) for TA table
loadi.z.0 r16, #0xfe5d
loadi.k.1 r16, #0xa3a3
store r16, r14, #113
@entry = 0x72 (114) for TA table
loadi.z.0 r16, #0xc080
loadi.k.1 r16, #0x4040
store r16, r14, #114
@entry = 0x73 (115) for TA table
loadi.z.0 r16, #0x8a05
loadi.k.1 r16, #0x8f8f
store r16, r14, #115
@entry = 0x74 (116) for TA table
loadi.z.0 r16, #0xad3f
loadi.k.1 r16, #0x9292
store r16, r14, #116
@entry = 0x75 (117) for TA table
loadi.z.0 r16, #0xbc21
loadi.k.1 r16, #0x9d9d
store r16, r14, #117
@entry = 0x76 (118) for TA table
loadi.z.0 r16, #0x4870
loadi.k.1 r16, #0x3838
store r16, r14, #118
@entry = 0x77 (119) for TA table
loadi.z.0 r16, #0x04f1
loadi.k.1 r16, #0xf5f5
store r16, r14, #119
@entry = 0x78 (120) for TA table
loadi.z.0 r16, #0xdf63
loadi.k.1 r16, #0xbcbc
store r16, r14, #120
@entry = 0x79 (121) for TA table
loadi.z.0 r16, #0xc177
loadi.k.1 r16, #0xb6b6
store r16, r14, #121
@entry = 0x7a (122) for TA table
loadi.z.0 r16, #0x75af
loadi.k.1 r16, #0xdada
store r16, r14, #122
@entry = 0x7b (123) for TA table
loadi.z.0 r16, #0x6342
loadi.k.1 r16, #0x2121
store r16, r14, #123
@entry = 0x7c (124) for TA table
loadi.z.0 r16, #0x3020
loadi.k.1 r16, #0x1010
store r16, r14, #124
@entry = 0x7d (125) for TA table
loadi.z.0 r16, #0x1ae5
loadi.k.1 r16, #0xffff
store r16, r14, #125
@entry = 0x7e (126) for TA table
loadi.z.0 r16, #0x0efd
loadi.k.1 r16, #0xf3f3
store r16, r14, #126
@entry = 0x7f (127) for TA table
loadi.z.0 r16, #0x6dbf
loadi.k.1 r16, #0xd2d2
store r16, r14, #127
@entry = 0x80 (128) for TA table
loadi.z.0 r16, #0x4c81
loadi.k.1 r16, #0xcdcd
store r16, r14, #128
@entry = 0x81 (129) for TA table
loadi.z.0 r16, #0x1418
loadi.k.1 r16, #0x0c0c
store r16, r14, #129
@entry = 0x82 (130) for TA table
loadi.z.0 r16, #0x3526
loadi.k.1 r16, #0x1313
store r16, r14, #130
@entry = 0x83 (131) for TA table
loadi.z.0 r16, #0x2fc3
loadi.k.1 r16, #0xecec
store r16, r14, #131
@entry = 0x84 (132) for TA table
loadi.z.0 r16, #0xe1be
loadi.k.1 r16, #0x5f5f
store r16, r14, #132
@entry = 0x85 (133) for TA table
loadi.z.0 r16, #0xa235
loadi.k.1 r16, #0x9797
store r16, r14, #133
@entry = 0x86 (134) for TA table
loadi.z.0 r16, #0xcc88
loadi.k.1 r16, #0x4444
store r16, r14, #134
@entry = 0x87 (135) for TA table
loadi.z.0 r16, #0x392e
loadi.k.1 r16, #0x1717
store r16, r14, #135
@entry = 0x88 (136) for TA table
loadi.z.0 r16, #0x5793
loadi.k.1 r16, #0xc4c4
store r16, r14, #136
@entry = 0x89 (137) for TA table
loadi.z.0 r16, #0xf255
loadi.k.1 r16, #0xa7a7
store r16, r14, #137
@entry = 0x8a (138) for TA table
loadi.z.0 r16, #0x82fc
loadi.k.1 r16, #0x7e7e
store r16, r14, #138
@entry = 0x8b (139) for TA table
loadi.z.0 r16, #0x477a
loadi.k.1 r16, #0x3d3d
store r16, r14, #139
@entry = 0x8c (140) for TA table
loadi.z.0 r16, #0xacc8
loadi.k.1 r16, #0x6464
store r16, r14, #140
@entry = 0x8d (141) for TA table
loadi.z.0 r16, #0xe7ba
loadi.k.1 r16, #0x5d5d
store r16, r14, #141
@entry = 0x8e (142) for TA table
loadi.z.0 r16, #0x2b32
loadi.k.1 r16, #0x1919
store r16, r14, #142
@entry = 0x8f (143) for TA table
loadi.z.0 r16, #0x95e6
loadi.k.1 r16, #0x7373
store r16, r14, #143
@entry = 0x90 (144) for TA table
loadi.z.0 r16, #0xa0c0
loadi.k.1 r16, #0x6060
store r16, r14, #144
@entry = 0x91 (145) for TA table
loadi.z.0 r16, #0x9819
loadi.k.1 r16, #0x8181
store r16, r14, #145
@entry = 0x92 (146) for TA table
loadi.z.0 r16, #0xd19e
loadi.k.1 r16, #0x4f4f
store r16, r14, #146
@entry = 0x93 (147) for TA table
loadi.z.0 r16, #0x7fa3
loadi.k.1 r16, #0xdcdc
store r16, r14, #147
@entry = 0x94 (148) for TA table
loadi.z.0 r16, #0x6644
loadi.k.1 r16, #0x2222
store r16, r14, #148
@entry = 0x95 (149) for TA table
loadi.z.0 r16, #0x7e54
loadi.k.1 r16, #0x2a2a
store r16, r14, #149
@entry = 0x96 (150) for TA table
loadi.z.0 r16, #0xab3b
loadi.k.1 r16, #0x9090
store r16, r14, #150
@entry = 0x97 (151) for TA table
loadi.z.0 r16, #0x830b
loadi.k.1 r16, #0x8888
store r16, r14, #151
@entry = 0x98 (152) for TA table
loadi.z.0 r16, #0xca8c
loadi.k.1 r16, #0x4646
store r16, r14, #152
@entry = 0x99 (153) for TA table
loadi.z.0 r16, #0x29c7
loadi.k.1 r16, #0xeeee
store r16, r14, #153
@entry = 0x9a (154) for TA table
loadi.z.0 r16, #0xd36b
loadi.k.1 r16, #0xb8b8
store r16, r14, #154
@entry = 0x9b (155) for TA table
loadi.z.0 r16, #0x3c28
loadi.k.1 r16, #0x1414
store r16, r14, #155
@entry = 0x9c (156) for TA table
loadi.z.0 r16, #0x79a7
loadi.k.1 r16, #0xdede
store r16, r14, #156
@entry = 0x9d (157) for TA table
loadi.z.0 r16, #0xe2bc
loadi.k.1 r16, #0x5e5e
store r16, r14, #157
@entry = 0x9e (158) for TA table
loadi.z.0 r16, #0x1d16
loadi.k.1 r16, #0x0b0b
store r16, r14, #158
@entry = 0x9f (159) for TA table
loadi.z.0 r16, #0x76ad
loadi.k.1 r16, #0xdbdb
store r16, r14, #159
@entry = 0xa0 (160) for TA table
loadi.z.0 r16, #0x3bdb
loadi.k.1 r16, #0xe0e0
store r16, r14, #160
@entry = 0xa1 (161) for TA table
loadi.z.0 r16, #0x5664
loadi.k.1 r16, #0x3232
store r16, r14, #161
@entry = 0xa2 (162) for TA table
loadi.z.0 r16, #0x4e74
loadi.k.1 r16, #0x3a3a
store r16, r14, #162
@entry = 0xa3 (163) for TA table
loadi.z.0 r16, #0x1e14
loadi.k.1 r16, #0x0a0a
store r16, r14, #163
@entry = 0xa4 (164) for TA table
loadi.z.0 r16, #0xdb92
loadi.k.1 r16, #0x4949
store r16, r14, #164
@entry = 0xa5 (165) for TA table
loadi.z.0 r16, #0x0a0c
loadi.k.1 r16, #0x0606
store r16, r14, #165
@entry = 0xa6 (166) for TA table
loadi.z.0 r16, #0x6c48
loadi.k.1 r16, #0x2424
store r16, r14, #166
@entry = 0xa7 (167) for TA table
loadi.z.0 r16, #0xe4b8
loadi.k.1 r16, #0x5c5c
store r16, r14, #167
@entry = 0xa8 (168) for TA table
loadi.z.0 r16, #0x5d9f
loadi.k.1 r16, #0xc2c2
store r16, r14, #168
@entry = 0xa9 (169) for TA table
loadi.z.0 r16, #0x6ebd
loadi.k.1 r16, #0xd3d3
store r16, r14, #169
@entry = 0xaa (170) for TA table
loadi.z.0 r16, #0xef43
loadi.k.1 r16, #0xacac
store r16, r14, #170
@entry = 0xab (171) for TA table
loadi.z.0 r16, #0xa6c4
loadi.k.1 r16, #0x6262
store r16, r14, #171
@entry = 0xac (172) for TA table
loadi.z.0 r16, #0xa839
loadi.k.1 r16, #0x9191
store r16, r14, #172
@entry = 0xad (173) for TA table
loadi.z.0 r16, #0xa431
loadi.k.1 r16, #0x9595
store r16, r14, #173
@entry = 0xae (174) for TA table
loadi.z.0 r16, #0x37d3
loadi.k.1 r16, #0xe4e4
store r16, r14, #174
@entry = 0xaf (175) for TA table
loadi.z.0 r16, #0x8bf2
loadi.k.1 r16, #0x7979
store r16, r14, #175
@entry = 0xb0 (176) for TA table
loadi.z.0 r16, #0x32d5
loadi.k.1 r16, #0xe7e7
store r16, r14, #176
@entry = 0xb1 (177) for TA table
loadi.z.0 r16, #0x438b
loadi.k.1 r16, #0xc8c8
store r16, r14, #177
@entry = 0xb2 (178) for TA table
loadi.z.0 r16, #0x596e
loadi.k.1 r16, #0x3737
store r16, r14, #178
@entry = 0xb3 (179) for TA table
loadi.z.0 r16, #0xb7da
loadi.k.1 r16, #0x6d6d
store r16, r14, #179
@entry = 0xb4 (180) for TA table
loadi.z.0 r16, #0x8c01
loadi.k.1 r16, #0x8d8d
store r16, r14, #180
@entry = 0xb5 (181) for TA table
loadi.z.0 r16, #0x64b1
loadi.k.1 r16, #0xd5d5
store r16, r14, #181
@entry = 0xb6 (182) for TA table
loadi.z.0 r16, #0xd29c
loadi.k.1 r16, #0x4e4e
store r16, r14, #182
@entry = 0xb7 (183) for TA table
loadi.z.0 r16, #0xe049
loadi.k.1 r16, #0xa9a9
store r16, r14, #183
@entry = 0xb8 (184) for TA table
loadi.z.0 r16, #0xb4d8
loadi.k.1 r16, #0x6c6c
store r16, r14, #184
@entry = 0xb9 (185) for TA table
loadi.z.0 r16, #0xfaac
loadi.k.1 r16, #0x5656
store r16, r14, #185
@entry = 0xba (186) for TA table
loadi.z.0 r16, #0x07f3
loadi.k.1 r16, #0xf4f4
store r16, r14, #186
@entry = 0xbb (187) for TA table
loadi.z.0 r16, #0x25cf
loadi.k.1 r16, #0xeaea
store r16, r14, #187
@entry = 0xbc (188) for TA table
loadi.z.0 r16, #0xafca
loadi.k.1 r16, #0x6565
store r16, r14, #188
@entry = 0xbd (189) for TA table
loadi.z.0 r16, #0x8ef4
loadi.k.1 r16, #0x7a7a
store r16, r14, #189
@entry = 0xbe (190) for TA table
loadi.z.0 r16, #0xe947
loadi.k.1 r16, #0xaeae
store r16, r14, #190
@entry = 0xbf (191) for TA table
loadi.z.0 r16, #0x1810
loadi.k.1 r16, #0x0808
store r16, r14, #191
@entry = 0xc0 (192) for TA table
loadi.z.0 r16, #0xd56f
loadi.k.1 r16, #0xbaba
store r16, r14, #192
@entry = 0xc1 (193) for TA table
loadi.z.0 r16, #0x88f0
loadi.k.1 r16, #0x7878
store r16, r14, #193
@entry = 0xc2 (194) for TA table
loadi.z.0 r16, #0x6f4a
loadi.k.1 r16, #0x2525
store r16, r14, #194
@entry = 0xc3 (195) for TA table
loadi.z.0 r16, #0x725c
loadi.k.1 r16, #0x2e2e
store r16, r14, #195
@entry = 0xc4 (196) for TA table
loadi.z.0 r16, #0x2438
loadi.k.1 r16, #0x1c1c
store r16, r14, #196
@entry = 0xc5 (197) for TA table
loadi.z.0 r16, #0xf157
loadi.k.1 r16, #0xa6a6
store r16, r14, #197
@entry = 0xc6 (198) for TA table
loadi.z.0 r16, #0xc773
loadi.k.1 r16, #0xb4b4
store r16, r14, #198
@entry = 0xc7 (199) for TA table
loadi.z.0 r16, #0x5197
loadi.k.1 r16, #0xc6c6
store r16, r14, #199
@entry = 0xc8 (200) for TA table
loadi.z.0 r16, #0x23cb
loadi.k.1 r16, #0xe8e8
store r16, r14, #200
@entry = 0xc9 (201) for TA table
loadi.z.0 r16, #0x7ca1
loadi.k.1 r16, #0xdddd
store r16, r14, #201
@entry = 0xca (202) for TA table
loadi.z.0 r16, #0x9ce8
loadi.k.1 r16, #0x7474
store r16, r14, #202
@entry = 0xcb (203) for TA table
loadi.z.0 r16, #0x213e
loadi.k.1 r16, #0x1f1f
store r16, r14, #203
@entry = 0xcc (204) for TA table
loadi.z.0 r16, #0xdd96
loadi.k.1 r16, #0x4b4b
store r16, r14, #204
@entry = 0xcd (205) for TA table
loadi.z.0 r16, #0xdc61
loadi.k.1 r16, #0xbdbd
store r16, r14, #205
@entry = 0xce (206) for TA table
loadi.z.0 r16, #0x860d
loadi.k.1 r16, #0x8b8b
store r16, r14, #206
@entry = 0xcf (207) for TA table
loadi.z.0 r16, #0x850f
loadi.k.1 r16, #0x8a8a
store r16, r14, #207
@entry = 0xd0 (208) for TA table
loadi.z.0 r16, #0x90e0
loadi.k.1 r16, #0x7070
store r16, r14, #208
@entry = 0xd1 (209) for TA table
loadi.z.0 r16, #0x427c
loadi.k.1 r16, #0x3e3e
store r16, r14, #209
@entry = 0xd2 (210) for TA table
loadi.z.0 r16, #0xc471
loadi.k.1 r16, #0xb5b5
store r16, r14, #210
@entry = 0xd3 (211) for TA table
loadi.z.0 r16, #0xaacc
loadi.k.1 r16, #0x6666
store r16, r14, #211
@entry = 0xd4 (212) for TA table
loadi.z.0 r16, #0xd890
loadi.k.1 r16, #0x4848
store r16, r14, #212
@entry = 0xd5 (213) for TA table
loadi.z.0 r16, #0x0506
loadi.k.1 r16, #0x0303
store r16, r14, #213
@entry = 0xd6 (214) for TA table
loadi.z.0 r16, #0x01f7
loadi.k.1 r16, #0xf6f6
store r16, r14, #214
@entry = 0xd7 (215) for TA table
loadi.z.0 r16, #0x121c
loadi.k.1 r16, #0x0e0e
store r16, r14, #215
@entry = 0xd8 (216) for TA table
loadi.z.0 r16, #0xa3c2
loadi.k.1 r16, #0x6161
store r16, r14, #216
@entry = 0xd9 (217) for TA table
loadi.z.0 r16, #0x5f6a
loadi.k.1 r16, #0x3535
store r16, r14, #217
@entry = 0xda (218) for TA table
loadi.z.0 r16, #0xf9ae
loadi.k.1 r16, #0x5757
store r16, r14, #218
@entry = 0xdb (219) for TA table
loadi.z.0 r16, #0xd069
loadi.k.1 r16, #0xb9b9
store r16, r14, #219
@entry = 0xdc (220) for TA table
loadi.z.0 r16, #0x9117
loadi.k.1 r16, #0x8686
store r16, r14, #220
@entry = 0xdd (221) for TA table
loadi.z.0 r16, #0x5899
loadi.k.1 r16, #0xc1c1
store r16, r14, #221
@entry = 0xde (222) for TA table
loadi.z.0 r16, #0x273a
loadi.k.1 r16, #0x1d1d
store r16, r14, #222
@entry = 0xdf (223) for TA table
loadi.z.0 r16, #0xb927
loadi.k.1 r16, #0x9e9e
store r16, r14, #223
@entry = 0xe0 (224) for TA table
loadi.z.0 r16, #0x38d9
loadi.k.1 r16, #0xe1e1
store r16, r14, #224
@entry = 0xe1 (225) for TA table
loadi.z.0 r16, #0x13eb
loadi.k.1 r16, #0xf8f8
store r16, r14, #225
@entry = 0xe2 (226) for TA table
loadi.z.0 r16, #0xb32b
loadi.k.1 r16, #0x9898
store r16, r14, #226
@entry = 0xe3 (227) for TA table
loadi.z.0 r16, #0x3322
loadi.k.1 r16, #0x1111
store r16, r14, #227
@entry = 0xe4 (228) for TA table
loadi.z.0 r16, #0xbbd2
loadi.k.1 r16, #0x6969
store r16, r14, #228
@entry = 0xe5 (229) for TA table
loadi.z.0 r16, #0x70a9
loadi.k.1 r16, #0xd9d9
store r16, r14, #229
@entry = 0xe6 (230) for TA table
loadi.z.0 r16, #0x8907
loadi.k.1 r16, #0x8e8e
store r16, r14, #230
@entry = 0xe7 (231) for TA table
loadi.z.0 r16, #0xa733
loadi.k.1 r16, #0x9494
store r16, r14, #231
@entry = 0xe8 (232) for TA table
loadi.z.0 r16, #0xb62d
loadi.k.1 r16, #0x9b9b
store r16, r14, #232
@entry = 0xe9 (233) for TA table
loadi.z.0 r16, #0x223c
loadi.k.1 r16, #0x1e1e
store r16, r14, #233
@entry = 0xea (234) for TA table
loadi.z.0 r16, #0x9215
loadi.k.1 r16, #0x8787
store r16, r14, #234
@entry = 0xeb (235) for TA table
loadi.z.0 r16, #0x20c9
loadi.k.1 r16, #0xe9e9
store r16, r14, #235
@entry = 0xec (236) for TA table
loadi.z.0 r16, #0x4987
loadi.k.1 r16, #0xcece
store r16, r14, #236
@entry = 0xed (237) for TA table
loadi.z.0 r16, #0xffaa
loadi.k.1 r16, #0x5555
store r16, r14, #237
@entry = 0xee (238) for TA table
loadi.z.0 r16, #0x7850
loadi.k.1 r16, #0x2828
store r16, r14, #238
@entry = 0xef (239) for TA table
loadi.z.0 r16, #0x7aa5
loadi.k.1 r16, #0xdfdf
store r16, r14, #239
@entry = 0xf0 (240) for TA table
loadi.z.0 r16, #0x8f03
loadi.k.1 r16, #0x8c8c
store r16, r14, #240
@entry = 0xf1 (241) for TA table
loadi.z.0 r16, #0xf859
loadi.k.1 r16, #0xa1a1
store r16, r14, #241
@entry = 0xf2 (242) for TA table
loadi.z.0 r16, #0x8009
loadi.k.1 r16, #0x8989
store r16, r14, #242
@entry = 0xf3 (243) for TA table
loadi.z.0 r16, #0x171a
loadi.k.1 r16, #0x0d0d
store r16, r14, #243
@entry = 0xf4 (244) for TA table
loadi.z.0 r16, #0xda65
loadi.k.1 r16, #0xbfbf
store r16, r14, #244
@entry = 0xf5 (245) for TA table
loadi.z.0 r16, #0x31d7
loadi.k.1 r16, #0xe6e6
store r16, r14, #245
@entry = 0xf6 (246) for TA table
loadi.z.0 r16, #0xc684
loadi.k.1 r16, #0x4242
store r16, r14, #246
@entry = 0xf7 (247) for TA table
loadi.z.0 r16, #0xb8d0
loadi.k.1 r16, #0x6868
store r16, r14, #247
@entry = 0xf8 (248) for TA table
loadi.z.0 r16, #0xc382
loadi.k.1 r16, #0x4141
store r16, r14, #248
@entry = 0xf9 (249) for TA table
loadi.z.0 r16, #0xb029
loadi.k.1 r16, #0x9999
store r16, r14, #249
@entry = 0xfa (250) for TA table
loadi.z.0 r16, #0x775a
loadi.k.1 r16, #0x2d2d
store r16, r14, #250
@entry = 0xfb (251) for TA table
loadi.z.0 r16, #0x111e
loadi.k.1 r16, #0x0f0f
store r16, r14, #251
@entry = 0xfc (252) for TA table
loadi.z.0 r16, #0xcb7b
loadi.k.1 r16, #0xb0b0
store r16, r14, #252
@entry = 0xfd (253) for TA table
loadi.z.0 r16, #0xfca8
loadi.k.1 r16, #0x5454
store r16, r14, #253
@entry = 0xfe (254) for TA table
loadi.z.0 r16, #0xd66d
loadi.k.1 r16, #0xbbbb
store r16, r14, #254
@entry = 0xff (255) for TA table
loadi.z.0 r16, #0x3a2c
loadi.k.1 r16, #0x1616
store r16, r14, #255
@ ---------------- Load PTLU Table -------------
@ r16 = TA table = (01 01 03 02) * S
@ r17 = TB table = (01 03 02 01) * S
@ r18 = TC table = (03 02 01 01) * S
@ r19 = TD table = (02 01 01 03) * S
@ where, "*" is GF(256) finite field multiplication, "S" is S-box (a vector in 256 bytes)
@ r20 = (2103 -> 10 01 00 11) - byte_perm TA to TB
@ r21 = (1032 -> 01 00 11 10) - byte_perm TA to TC
@ r22 = (0321 -> 00 11 10 01) - byte_perm TA to TD
@ define byte_perm table
loadi.z.0 r20, #0x0093 @ for TA to TB
loadi.z.0 r21, #0x004e @ for TA to TC
loadi.z.0 r22, #0x0039 @ for TA to TD
@ init for PTLU entry loop, 0-255
loadi.z.0 r13, #0x00ff @ r13 = 256, total number of entry for PTLU
loadi.z.0 r14, #0x0000 @ r14 = start address of TA memory.
@ r14 must be 0, or loop code need to change
@ =========== start ptlu-table loop for 256 entries
@ load TA table into PTLU
load r16, r14, #0; @ read 1 TA entry data into r16 (32 bits)
ptw.0 r14, r16; @ load r16 into ptlu TA
@ load TB table into PTLU
byte_perm r17, r16, r20; @ BytePerm r16 (TA) into r17 (TB)
ptw.1 r14, r17; @ load r17 into ptlu TB
@ load TC table into PTLU
byte_perm r18, r16, r21; @ BytePerm r16 (TA) into r18 (TC)
ptw.2 r14, r18; @ load r18 into ptlu TC
@ load TD table into PTLU
byte_perm r19, r16, r22; @ BytePerm r16 (TA) into r19 (TD)
ptw.3 r14, r19; @ load r19 into ptlu TD
addi r14, r14, #1; @ entry address r14++;
bne r13, r14, #0xfff6; @ PC -= 10, if current entry is not 256,
@ goto the loop begning
@ ========== end of ptlu-table loop
@ ---------------- Load Input data -------------
@ r15 is reserved for storing initial address of memory block for all subkeys
loadi.z.0 r15, #0x0 @ subkey memory block starts from 0x0 (0)
@ load all subkeys into memory block starting from r15
@ this is generated by
loadi.z.0 r16, #0x4f3c @ load b0 of R[0] subkey into memory
loadi.k.1 r16, #0x09cf @ load b1 of R[0] subkey into memory
loadi.z.0 r17, #0x1588 @ load b2 of R[0] subkey into memory
loadi.k.1 r17, #0xabf7 @ load b3 of R[0] subkey into memory
loadi.z.0 r18, #0xd2a6 @ load b4 of R[0] subkey into memory
loadi.k.1 r18, #0x28ae @ load b5 of R[0] subkey into memory
loadi.z.0 r19, #0x1516 @ load b6 of R[0] subkey into memory
loadi.k.1 r19, #0x2b7e @ load b7 of R[0] subkey into memory
store r16, r15, #0
store r17, r15, #1
store r18, r15, #2
store r19, r15, #3
loadi.z.0 r16, #0x7605 @ load b0 of R[1] subkey into memory
loadi.k.1 r16, #0x2a6c @ load b1 of R[1] subkey into memory
loadi.z.0 r17, #0x3939 @ load b2 of R[1] subkey into memory
loadi.k.1 r17, #0x23a3 @ load b3 of R[1] subkey into memory
loadi.z.0 r18, #0x2cb1 @ load b4 of R[1] subkey into memory
loadi.k.1 r18, #0x8854 @ load b5 of R[1] subkey into memory
loadi.z.0 r19, #0xfe17 @ load b6 of R[1] subkey into memory
loadi.k.1 r19, #0xa0fa @ load b7 of R[1] subkey into memory
store r16, r15, #4
store r17, r15, #5
store r18, r15, #6
store r19, r15, #7
loadi.z.0 r16, #0xf67f @ load b0 of R[2] subkey into memory
loadi.k.1 r16, #0x7359 @ load b1 of R[2] subkey into memory
loadi.z.0 r17, #0x807a @ load b2 of R[2] subkey into memory
loadi.k.1 r17, #0x5935 @ load b3 of R[2] subkey into memory
loadi.z.0 r18, #0xb943 @ load b4 of R[2] subkey into memory
loadi.k.1 r18, #0x7a96 @ load b5 of R[2] subkey into memory
loadi.z.0 r19, #0x95f2 @ load b6 of R[2] subkey into memory
loadi.k.1 r19, #0xf2c2 @ load b7 of R[2] subkey into memory
store r16, r15, #8
store r17, r15, #9
store r18, r15, #10
store r19, r15, #11
loadi.z.0 r16, #0x883b @ load b0 of R[3] subkey into memory
loadi.k.1 r16, #0x6d7a @ load b1 of R[3] subkey into memory
loadi.z.0 r17, #0x7e44 @ load b2 of R[3] subkey into memory
loadi.k.1 r17, #0x1e23 @ load b3 of R[3] subkey into memory
loadi.z.0 r18, #0xfe3e @ load b4 of R[3] subkey into memory
loadi.k.1 r18, #0x4716 @ load b5 of R[3] subkey into memory
loadi.z.0 r19, #0x477d @ load b6 of R[3] subkey into memory
loadi.k.1 r19, #0x3d80 @ load b7 of R[3] subkey into memory
store r16, r15, #12
store r17, r15, #13
store r18, r15, #14
store r19, r15, #15
loadi.z.0 r16, #0xad00 @ load b0 of R[4] subkey into memory
loadi.k.1 r16, #0xdb0b @ load b1 of R[4] subkey into memory
loadi.z.0 r17, #0x253b @ load b2 of R[4] subkey into memory
loadi.k.1 r17, #0xb671 @ load b3 of R[4] subkey into memory
loadi.z.0 r18, #0x5b7f @ load b4 of R[4] subkey into memory
loadi.k.1 r18, #0xa852 @ load b5 of R[4] subkey into memory
loadi.z.0 r19, #0xa541 @ load b6 of R[4] subkey into memory
loadi.k.1 r19, #0xef44 @ load b7 of R[4] subkey into memory
store r16, r15, #16
store r17, r15, #17
store r18, r15, #18
store r19, r15, #19
loadi.z.0 r16, #0x15bc @ load b0 of R[5] subkey into memory
loadi.k.1 r16, #0x11f9 @ load b1 of R[5] subkey into memory
loadi.z.0 r17, #0xb8bc @ load b2 of R[5] subkey into memory
loadi.k.1 r17, #0xcaf2 @ load b3 of R[5] subkey into memory
loadi.z.0 r18, #0x9d87 @ load b4 of R[5] subkey into memory
loadi.k.1 r18, #0x7c83 @ load b5 of R[5] subkey into memory
loadi.z.0 r19, #0xc6f8 @ load b6 of R[5] subkey into memory
loadi.k.1 r19, #0xd4d1 @ load b7 of R[5] subkey into memory
store r16, r15, #20
store r17, r15, #21
store r18, r15, #22
store r19, r15, #23
loadi.z.0 r16, #0x93fd @ load b0 of R[6] subkey into memory
loadi.k.1 r16, #0xca00 @ load b1 of R[6] subkey into memory
loadi.z.0 r17, #0x8641 @ load b2 of R[6] subkey into memory
loadi.k.1 r17, #0xdbf9 @ load b3 of R[6] subkey into memory
loadi.z.0 r18, #0x3efd @ load b4 of R[6] subkey into memory
loadi.k.1 r18, #0x110b @ load b5 of R[6] subkey into memory
loadi.z.0 r19, #0xa37a @ load b6 of R[6] subkey into memory
loadi.k.1 r19, #0x6d88 @ load b7 of R[6] subkey into memory
store r16, r15, #24
store r17, r15, #25
store r18, r15, #26
store r19, r15, #27
loadi.z.0 r16, #0xdc4f @ load b0 of R[7] subkey into memory
loadi.k.1 r16, #0x4ea6 @ load b1 of R[7] subkey into memory
loadi.z.0 r17, #0x4fb2 @ load b2 of R[7] subkey into memory
loadi.k.1 r17, #0x84a6 @ load b3 of R[7] subkey into memory
loadi.z.0 r18, #0xc9f3 @ load b4 of R[7] subkey into memory
loadi.k.1 r18, #0x5f5f @ load b5 of R[7] subkey into memory
loadi.z.0 r19, #0xf70e @ load b6 of R[7] subkey into memory
loadi.k.1 r19, #0x4e54 @ load b7 of R[7] subkey into memory
store r16, r15, #28
store r17, r15, #29
store r18, r15, #30
store r19, r15, #31
loadi.z.0 r16, #0x292f @ load b0 of R[8] subkey into memory
loadi.k.1 r16, #0x7f8d @ load b1 of R[8] subkey into memory
loadi.z.0 r17, #0xf560 @ load b2 of R[8] subkey into memory
loadi.k.1 r17, #0x312b @ load b3 of R[8] subkey into memory
loadi.z.0 r18, #0xbad2 @ load b4 of R[8] subkey into memory
loadi.k.1 r18, #0xb58d @ load b5 of R[8] subkey into memory
loadi.z.0 r19, #0x7321 @ load b6 of R[8] subkey into memory
loadi.k.1 r19, #0xead2 @ load b7 of R[8] subkey into memory
store r16, r15, #32
store r17, r15, #33
store r18, r15, #34
store r19, r15, #35
loadi.z.0 r16, #0x006e @ load b0 of R[9] subkey into memory
loadi.k.1 r16, #0x575c @ load b1 of R[9] subkey into memory
loadi.z.0 r17, #0x2941 @ load b2 of R[9] subkey into memory
loadi.k.1 r17, #0x28d1 @ load b3 of R[9] subkey into memory
loadi.z.0 r18, #0xdc21 @ load b4 of R[9] subkey into memory
loadi.k.1 r18, #0x19fa @ load b5 of R[9] subkey into memory
loadi.z.0 r19, #0x66f3 @ load b6 of R[9] subkey into memory
loadi.k.1 r19, #0xac77 @ load b7 of R[9] subkey into memory
store r16, r15, #36
store r17, r15, #37
store r18, r15, #38
store r19, r15, #39
loadi.z.0 r16, #0x0ca6 @ load b0 of R[10] subkey into memory
loadi.k.1 r16, #0xb663 @ load b1 of R[10] subkey into memory
loadi.z.0 r17, #0x0cc8 @ load b2 of R[10] subkey into memory
loadi.k.1 r17, #0xe13f @ load b3 of R[10] subkey into memory
loadi.z.0 r18, #0x2589 @ load b4 of R[10] subkey into memory
loadi.k.1 r18, #0xc9ee @ load b5 of R[10] subkey into memory
loadi.z.0 r19, #0xf9a8 @ load b6 of R[10] subkey into memory
loadi.k.1 r19, #0xd014 @ load b7 of R[10] subkey into memory
store r16, r15, #40
store r17, r15, #41
store r18, r15, #42
store r19, r15, #43
@ r19, r18, r17, r16 = input 128-bit plain text
loadi.z.0 r16, #0x0734
loadi.k.1 r16, #0xe037
loadi.z.0 r17, #0x98a2
loadi.k.1 r17, #0x3131
loadi.z.0 r18, #0x308d
loadi.k.1 r18, #0x885a
loadi.z.0 r19, #0xf6a8
loadi.k.1 r19, #0x3243
@ Load byte swap index for byte_perm
loadi.z.0 r5, #0x00c9 @ r5 = d2: (3, 2, 1, 0) => (3, 0, 2, 1) => (11 00 10 01) = c9
loadi.z.0 r6, #0x0027 @ r6 = 27: (3, 2, 1, 0) => (0, 2, 1, 3) => (00 10 01 11) = 27
loadi.z.0 r7, #0x00c6 @ r7 = c6: (3, 2, 1, 0) => (3, 0, 1, 2) => (11 00 01 10) = c6
loadi.z.0 r8, #0x00d2 @ r8 = c9: (3, 2, 1, 0) => (3, 1, 0, 2) => (11 01 00 10) = d2
loadi.z.0 r9, #0x00c9 @ r9 = d2: (3, 2, 1, 0) => (3, 0, 2, 1) => (11 00 10 01) = c9
loadi.z.0 r10, #0x0072 @ r10= 8d: (3, 2, 1, 0) => (1, 3, 0, 2) => (01 11 00 10) = 72
loadi.z.0 r11, #0x006c @ r11= 6c: (3, 2, 1, 0) => (1, 2, 3, 0) => (01 10 11 00) = 6c
loadi.z.0 r12, #0x0087 @ r12= 36: (3, 2, 1, 0) => (2, 0, 1, 3) => (10 00 01 11) = 87
@ ---------------- AES-128 Encryption -------------
@ One block of AES encryption (10 rounds)
@ r19, r18, r17, r16 contains 128-bit plaintext
@ r20 contains current word of subkey loaded from memory
@ r24-r21 contains byte indices (15,10,5,0,11,6,1,12,7,2,13,8,3,14,9,4) for round 1-9
@ r24-r21 contains byte indices (5,0,15,10,1,12,11,6,13,8,7,2,9,4,3,14) for round 10
@ PTLU tables contain AES tables (TD-TA)
@ --------------- AES 0 round operation ----------------------
@ round input 4 words are stored in r19, r18, r17, r16
@ (output 4 words) = (input 4 words) xor (R[0] 4 words)
@ r15 is reserved for storing initial address of memory block for all subkeys
load r20, r15, #0 @ load W0 of subkey R[0] from memory
xor r16, r16, r20 @ xor input W0 with R[0] W0
@ store the round output W0 into r16
@ pc_s = 934, r16 = e9f84808
load r20, r15, #1 @ load W1 of subkey R[0] from memory
xor r17, r17, r20 @ xor input W1 with R[0] W1
@ store the round output W1 into r17
@ pc_s = 936, r17 = 9ac68d2a
load r20, r15, #2 @ load W2 of subkey R[0] from memory
xor r18, r18, r20 @ xor input W2 with R[0] W2
@ store the round output W2 into r18
@ pc_s = 938, r18 = a0f4e22b
load r20, r15, #3 @ load W3 of subkey R[0] from memory
xor r19, r19, r20 @ xor input W3 with R[0] W3
@ store the round output W3 into r19
@ pc_s = 940, r19 = 193de3be
@ round output 4 words are stored in r19, r18, r17, r16
@ --------------- AES 1st round operation ----------------------
@ round input 4 words are stored in r19, r18, r17, r16
@ convet state bytes from: r19-r16 = b15 b14 b13 b12 | b11 b10 b9 b8 | b7 b6 b5 b4 | b3 b2 b1 b0
@ to the new order of: r24-r21 = b15 b10 b5 b0 | b11 b6 b1 b12 | b7 b2 b13 b8 | b3 b14 b9 b4
byte_perm r20, r16, r5 @ r20 = b3 b0 b2 b1 - temp
shrp r25, r20, r17, #16 @ r25 = b2 b1 b7 b6 - temp
shrp r26, r17, r20, #16 @ r26 = b5 b4 b3 b0 - temp
byte_perm r20, r18, r5 @ r20 = b11 b8 b10 b9 - temp
shrp r27, r20, r19, #16 @ r27 = b10 b9 b15 b14 - temp
shrp r28, r19, r20, #16 @ r28 = b13 b12 b11 b8 - temp
byte_perm r25, r25, r6 @ r25 = b6 b1 b7 b2 - temp
byte_perm r26, r26, r7 @ r26 = b5 b0 b3 b4 - temp
byte_perm r27, r27, r6 @ r27 = b14 b9 b15 b10 - temp
byte_perm r28, r28, r7 @ r28 = b13 b8 b11 b12 - temp
shrp r21, r27, r26, #16 @ r21 = b15 b10 b5 b0 - r21 final for W3
shrp r22, r26, r27, #16 @ r22 = b3 b4 b14 b9 - temp
byte_perm r22, r22, r8 @ r22 = b3 b14 b9 b4 - 22 final for W0
shrp r23, r25, r28, #16 @ r23 = b7 b2 b13 b8 - r23 final for W1
shrp r24, r28, r25, #16 @ r24 = b11 b12 b6 b1 - temp
byte_perm r24, r24, r8 @ r24 = b11 b6 b1 b12- r24 final for W2
@ parallel table lookup to generate round output for each word
@ r15 is reserved for storing initial address of memory block for all subkeys
load r20, r15, #4 @ load W0 of subkey R[1] from memory
ptr.x4 r16, r22, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W0 into r16
@ pc_s = 958, r16 = 026a5049
load r20, r15, #5 @ load W1 of subkey R[1] from memory
ptr.x4 r17, r23, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W1 into r17
@ pc_s = 960, r17 = 6b5bea43
load r20, r15, #6 @ load W2 of subkey R[1] from memory
ptr.x4 r18, r24, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W2 into r18
@ pc_s = 962, r18 = 689f352b
load r20, r15, #7 @ load W3 of subkey R[1] from memory
ptr.x4 r19, r21, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W3 into r19
@ pc_s = 964, r19 = a49c7ff2
@ round output 4 words are stored in r19, r18, r17, r16
@ --------------- AES 2nd round operation ----------------------
@ round input 4 words are stored in r19, r18, r17, r16
@ convet state bytes from: r19-r16 = b15 b14 b13 b12 | b11 b10 b9 b8 | b7 b6 b5 b4 | b3 b2 b1 b0
@ to the new order of: r24-r21 = b15 b10 b5 b0 | b11 b6 b1 b12 | b7 b2 b13 b8 | b3 b14 b9 b4
byte_perm r20, r16, r5 @ r20 = b3 b0 b2 b1 - temp
shrp r25, r20, r17, #16 @ r25 = b2 b1 b7 b6 - temp
shrp r26, r17, r20, #16 @ r26 = b5 b4 b3 b0 - temp
byte_perm r20, r18, r5 @ r20 = b11 b8 b10 b9 - temp
shrp r27, r20, r19, #16 @ r27 = b10 b9 b15 b14 - temp
shrp r28, r19, r20, #16 @ r28 = b13 b12 b11 b8 - temp
byte_perm r25, r25, r6 @ r25 = b6 b1 b7 b2 - temp
byte_perm r26, r26, r7 @ r26 = b5 b0 b3 b4 - temp
byte_perm r27, r27, r6 @ r27 = b14 b9 b15 b10 - temp
byte_perm r28, r28, r7 @ r28 = b13 b8 b11 b12 - temp
shrp r21, r27, r26, #16 @ r21 = b15 b10 b5 b0 - r21 final for W3
shrp r22, r26, r27, #16 @ r22 = b3 b4 b14 b9 - temp
byte_perm r22, r22, r8 @ r22 = b3 b14 b9 b4 - 22 final for W0
shrp r23, r25, r28, #16 @ r23 = b7 b2 b13 b8 - r23 final for W1
shrp r24, r28, r25, #16 @ r24 = b11 b12 b6 b1 - temp
byte_perm r24, r24, r8 @ r24 = b11 b6 b1 b12- r24 final for W2
@ parallel table lookup to generate round output for each word
@ r15 is reserved for storing initial address of memory block for all subkeys
load r20, r15, #8 @ load W0 of subkey R[2] from memory
ptr.x4 r16, r22, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W0 into r16
@ pc_s = 982, r16 = 6832469a
load r20, r15, #9 @ load W1 of subkey R[2] from memory
ptr.x4 r17, r23, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W1 into r17
@ pc_s = 984, r17 = 82d24ad2
load r20, r15, #10 @ load W2 of subkey R[2] from memory
ptr.x4 r18, r24, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W2 into r18
@ pc_s = 986, r18 = 61dde3ef
load r20, r15, #11 @ load W3 of subkey R[2] from memory
ptr.x4 r19, r21, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W3 into r19
@ pc_s = 988, r19 = aa8f5f03
@ round output 4 words are stored in r19, r18, r17, r16
@ --------------- AES 3rd round operation ----------------------
@ round input 4 words are stored in r19, r18, r17, r16
@ convet state bytes from: r19-r16 = b15 b14 b13 b12 | b11 b10 b9 b8 | b7 b6 b5 b4 | b3 b2 b1 b0
@ to the new order of: r24-r21 = b15 b10 b5 b0 | b11 b6 b1 b12 | b7 b2 b13 b8 | b3 b14 b9 b4
byte_perm r20, r16, r5 @ r20 = b3 b0 b2 b1 - temp
shrp r25, r20, r17, #16 @ r25 = b2 b1 b7 b6 - temp
shrp r26, r17, r20, #16 @ r26 = b5 b4 b3 b0 - temp
byte_perm r20, r18, r5 @ r20 = b11 b8 b10 b9 - temp
shrp r27, r20, r19, #16 @ r27 = b10 b9 b15 b14 - temp
shrp r28, r19, r20, #16 @ r28 = b13 b12 b11 b8 - temp
byte_perm r25, r25, r6 @ r25 = b6 b1 b7 b2 - temp
byte_perm r26, r26, r7 @ r26 = b5 b0 b3 b4 - temp
byte_perm r27, r27, r6 @ r27 = b14 b9 b15 b10 - temp
byte_perm r28, r28, r7 @ r28 = b13 b8 b11 b12 - temp
shrp r21, r27, r26, #16 @ r21 = b15 b10 b5 b0 - r21 final for W3
shrp r22, r26, r27, #16 @ r22 = b3 b4 b14 b9 - temp
byte_perm r22, r22, r8 @ r22 = b3 b14 b9 b4 - 22 final for W0
shrp r23, r25, r28, #16 @ r23 = b7 b2 b13 b8 - r23 final for W1
shrp r24, r28, r25, #16 @ r24 = b11 b12 b6 b1 - temp
byte_perm r24, r24, r8 @ r24 = b11 b6 b1 b12- r24 final for W2
@ parallel table lookup to generate round output for each word
@ r15 is reserved for storing initial address of memory block for all subkeys
load r20, r15, #12 @ load W0 of subkey R[3] from memory
ptr.x4 r16, r22, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W0 into r16
@ pc_s = 1006, r16 = d65f58e7
load r20, r15, #13 @ load W1 of subkey R[3] from memory
ptr.x4 r17, r23, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W1 into r17
@ pc_s = 1008, r16 = 4de3b138
load r20, r15, #14 @ load W2 of subkey R[3] from memory
ptr.x4 r18, r24, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W2 into r18
@ pc_s = 1010, r16 = 671d9d0d
load r20, r15, #15 @ load W3 of subkey R[3] from memory
ptr.x4 r19, r21, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W3 into r19
@ pc_s = 1012, r16 = 486c4eee
@ round output 4 words are stored in r19, r18, r17, r16
@ --------------- AES 4th round operation ----------------------
@ round input 4 words are stored in r19, r18, r17, r16
@ convet state bytes from: r19-r16 = b15 b14 b13 b12 | b11 b10 b9 b8 | b7 b6 b5 b4 | b3 b2 b1 b0
@ to the new order of: r24-r21 = b15 b10 b5 b0 | b11 b6 b1 b12 | b7 b2 b13 b8 | b3 b14 b9 b4
byte_perm r20, r16, r5 @ r20 = b3 b0 b2 b1 - temp
shrp r25, r20, r17, #16 @ r25 = b2 b1 b7 b6 - temp
shrp r26, r17, r20, #16 @ r26 = b5 b4 b3 b0 - temp
byte_perm r20, r18, r5 @ r20 = b11 b8 b10 b9 - temp
shrp r27, r20, r19, #16 @ r27 = b10 b9 b15 b14 - temp
shrp r28, r19, r20, #16 @ r28 = b13 b12 b11 b8 - temp
byte_perm r25, r25, r6 @ r25 = b6 b1 b7 b2 - temp
byte_perm r26, r26, r7 @ r26 = b5 b0 b3 b4 - temp
byte_perm r27, r27, r6 @ r27 = b14 b9 b15 b10 - temp
byte_perm r28, r28, r7 @ r28 = b13 b8 b11 b12 - temp
shrp r21, r27, r26, #16 @ r21 = b15 b10 b5 b0 - r21 final for W3
shrp r22, r26, r27, #16 @ r22 = b3 b4 b14 b9 - temp
byte_perm r22, r22, r8 @ r22 = b3 b14 b9 b4 - 22 final for W0
shrp r23, r25, r28, #16 @ r23 = b7 b2 b13 b8 - r23 final for W1
shrp r24, r28, r25, #16 @ r24 = b11 b12 b6 b1 - temp
byte_perm r24, r24, r8 @ r24 = b11 b6 b1 b12- r24 final for W2
@ parallel table lookup to generate round output for each word
@ r15 is reserved for storing initial address of memory block for all subkeys
load r20, r15, #16 @ load W0 of subkey R[4] from memory
ptr.x4 r16, r22, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W0 into r16
@ pc_s = 1030, r16 = 85b8be01
load r20, r15, #17 @ load W1 of subkey R[4] from memory
ptr.x4 r17, r23, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W1 into r17
@ pc_s = 1032, r16 = d9b13550
load r20, r15, #18 @ load W2 of subkey R[4] from memory
ptr.x4 r18, r24, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W2 into r18
@ pc_s = 1034, r16 = c86363c0
load r20, r15, #19 @ load W3 of subkey R[4] from memory
ptr.x4 r19, r21, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W3 into r19
@ pc_s = 1036, r16 = e0927fe8
@ round output 4 words are stored in r19, r18, r17, r16
@ --------------- AES 5th round operation ----------------------
@ round input 4 words are stored in r19, r18, r17, r16
@ convet state bytes from: r19-r16 = b15 b14 b13 b12 | b11 b10 b9 b8 | b7 b6 b5 b4 | b3 b2 b1 b0
@ to the new order of: r24-r21 = b15 b10 b5 b0 | b11 b6 b1 b12 | b7 b2 b13 b8 | b3 b14 b9 b4
byte_perm r20, r16, r5 @ r20 = b3 b0 b2 b1 - temp
shrp r25, r20, r17, #16 @ r25 = b2 b1 b7 b6 - temp
shrp r26, r17, r20, #16 @ r26 = b5 b4 b3 b0 - temp
byte_perm r20, r18, r5 @ r20 = b11 b8 b10 b9 - temp
shrp r27, r20, r19, #16 @ r27 = b10 b9 b15 b14 - temp
shrp r28, r19, r20, #16 @ r28 = b13 b12 b11 b8 - temp
byte_perm r25, r25, r6 @ r25 = b6 b1 b7 b2 - temp
byte_perm r26, r26, r7 @ r26 = b5 b0 b3 b4 - temp
byte_perm r27, r27, r6 @ r27 = b14 b9 b15 b10 - temp
byte_perm r28, r28, r7 @ r28 = b13 b8 b11 b12 - temp
shrp r21, r27, r26, #16 @ r21 = b15 b10 b5 b0 - r21 final for W3
shrp r22, r26, r27, #16 @ r22 = b3 b4 b14 b9 - temp
byte_perm r22, r22, r8 @ r22 = b3 b14 b9 b4 - 22 final for W0
shrp r23, r25, r28, #16 @ r23 = b7 b2 b13 b8 - r23 final for W1
shrp r24, r28, r25, #16 @ r24 = b11 b12 b6 b1 - temp
byte_perm r24, r24, r8 @ r24 = b11 b6 b1 b12- r24 final for W2
@ parallel table lookup to generate round output for each word
@ r15 is reserved for storing initial address of memory block for all subkeys
load r20, r15, #20 @ load W0 of subkey R[5] from memory
ptr.x4 r16, r22, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W0 into r16
@ pc_s = 1054, r16 = 5db5d50c
load r20, r15, #21 @ load W1 of subkey R[5] from memory
ptr.x4 r17, r23, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W1 into r17
@ pc_s = 1056, r16 = 7cc88b32
load r20, r15, #22 @ load W2 of subkey R[5] from memory
ptr.x4 r18, r24, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W2 into r18
@ pc_s = 1058, r16 = c1924cef
load r20, r15, #23 @ load W3 of subkey R[5] from memory
ptr.x4 r19, r21, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W3 into r19
@ pc_s = 1060, r16 = f1006f55
@ round output 4 words are stored in r19, r18, r17, r16
@ --------------- AES 6th round operation ----------------------
@ round input 4 words are stored in r19, r18, r17, r16
@ convet state bytes from: r19-r16 = b15 b14 b13 b12 | b11 b10 b9 b8 | b7 b6 b5 b4 | b3 b2 b1 b0
@ to the new order of: r24-r21 = b15 b10 b5 b0 | b11 b6 b1 b12 | b7 b2 b13 b8 | b3 b14 b9 b4
byte_perm r20, r16, r5 @ r20 = b3 b0 b2 b1 - temp
shrp r25, r20, r17, #16 @ r25 = b2 b1 b7 b6 - temp
shrp r26, r17, r20, #16 @ r26 = b5 b4 b3 b0 - temp
byte_perm r20, r18, r5 @ r20 = b11 b8 b10 b9 - temp
shrp r27, r20, r19, #16 @ r27 = b10 b9 b15 b14 - temp
shrp r28, r19, r20, #16 @ r28 = b13 b12 b11 b8 - temp
byte_perm r25, r25, r6 @ r25 = b6 b1 b7 b2 - temp
byte_perm r26, r26, r7 @ r26 = b5 b0 b3 b4 - temp
byte_perm r27, r27, r6 @ r27 = b14 b9 b15 b10 - temp
byte_perm r28, r28, r7 @ r28 = b13 b8 b11 b12 - temp
shrp r21, r27, r26, #16 @ r21 = b15 b10 b5 b0 - r21 final for W3
shrp r22, r26, r27, #16 @ r22 = b3 b4 b14 b9 - temp
byte_perm r22, r22, r8 @ r22 = b3 b14 b9 b4 - 22 final for W0
shrp r23, r25, r28, #16 @ r23 = b7 b2 b13 b8 - r23 final for W1
shrp r24, r28, r25, #16 @ r24 = b11 b12 b6 b1 - temp
byte_perm r24, r24, r8 @ r24 = b11 b6 b1 b12- r24 final for W2
@ parallel table lookup to generate round output for each word
@ r15 is reserved for storing initial address of memory block for all subkeys
load r20, r15, #24 @ load W0 of subkey R[6] from memory
ptr.x4 r16, r22, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W0 into r16
@ pc_s = 1078, r16 = fdd28b25
load r20, r15, #25 @ load W1 of subkey R[6] from memory
ptr.x4 r17, r23, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W1 into r17
@ pc_s = 1080, r16 = e86472a9
load r20, r15, #26 @ load W2 of subkey R[6] from memory
ptr.x4 r18, r24, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W2 into r18
@ pc_s = 1082, r16 = 3d41b77d
load r20, r15, #27 @ load W3 of subkey R[6] from memory
ptr.x4 r19, r21, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W3 into r19
@ pc_s = 1084, r16 = 260e2e17
@ round output 4 words are stored in r19, r18, r17, r16
@ --------------- AES 7th round operation ----------------------
@ round input 4 words are stored in r19, r18, r17, r16
@ convet state bytes from: r19-r16 = b15 b14 b13 b12 | b11 b10 b9 b8 | b7 b6 b5 b4 | b3 b2 b1 b0
@ to the new order of: r24-r21 = b15 b10 b5 b0 | b11 b6 b1 b12 | b7 b2 b13 b8 | b3 b14 b9 b4
byte_perm r20, r16, r5 @ r20 = b3 b0 b2 b1 - temp
shrp r25, r20, r17, #16 @ r25 = b2 b1 b7 b6 - temp
shrp r26, r17, r20, #16 @ r26 = b5 b4 b3 b0 - temp
byte_perm r20, r18, r5 @ r20 = b11 b8 b10 b9 - temp
shrp r27, r20, r19, #16 @ r27 = b10 b9 b15 b14 - temp
shrp r28, r19, r20, #16 @ r28 = b13 b12 b11 b8 - temp
byte_perm r25, r25, r6 @ r25 = b6 b1 b7 b2 - temp
byte_perm r26, r26, r7 @ r26 = b5 b0 b3 b4 - temp
byte_perm r27, r27, r6 @ r27 = b14 b9 b15 b10 - temp
byte_perm r28, r28, r7 @ r28 = b13 b8 b11 b12 - temp
shrp r21, r27, r26, #16 @ r21 = b15 b10 b5 b0 - r21 final for W3
shrp r22, r26, r27, #16 @ r22 = b3 b4 b14 b9 - temp
byte_perm r22, r22, r8 @ r22 = b3 b14 b9 b4 - 22 final for W0
shrp r23, r25, r28, #16 @ r23 = b7 b2 b13 b8 - r23 final for W1
shrp r24, r28, r25, #16 @ r24 = b11 b12 b6 b1 - temp
byte_perm r24, r24, r8 @ r24 = b11 b6 b1 b12- r24 final for W2
@ parallel table lookup to generate round output for each word
@ r15 is reserved for storing initial address of memory block for all subkeys
load r20, r15, #28 @ load W0 of subkey R[7] from memory
ptr.x4 r16, r22, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W0 into r16
@ pc_s = 1102, r16 = 7a8c040c
load r20, r15, #29 @ load W1 of subkey R[7] from memory
ptr.x4 r17, r23, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W1 into r17
@ pc_s = 1104, r16 = a3e01965
load r20, r15, #30 @ load W2 of subkey R[7] from memory
ptr.x4 r18, r24, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W2 into r18
@ pc_s = 1106, r16 = 1949dc1f
load r20, r15, #31 @ load W3 of subkey R[7] from memory
ptr.x4 r19, r21, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W3 into r19
@ pc_s = 1108, r16 = 5a4142b1
@ round output 4 words are stored in r19, r18, r17, r16
@ --------------- AES 8th round operation ----------------------
@ round input 4 words are stored in r19, r18, r17, r16
@ convet state bytes from: r19-r16 = b15 b14 b13 b12 | b11 b10 b9 b8 | b7 b6 b5 b4 | b3 b2 b1 b0
@ to the new order of: r24-r21 = b15 b10 b5 b0 | b11 b6 b1 b12 | b7 b2 b13 b8 | b3 b14 b9 b4
byte_perm r20, r16, r5 @ r20 = b3 b0 b2 b1 - temp
shrp r25, r20, r17, #16 @ r25 = b2 b1 b7 b6 - temp
shrp r26, r17, r20, #16 @ r26 = b5 b4 b3 b0 - temp
byte_perm r20, r18, r5 @ r20 = b11 b8 b10 b9 - temp
shrp r27, r20, r19, #16 @ r27 = b10 b9 b15 b14 - temp
shrp r28, r19, r20, #16 @ r28 = b13 b12 b11 b8 - temp
byte_perm r25, r25, r6 @ r25 = b6 b1 b7 b2 - temp
byte_perm r26, r26, r7 @ r26 = b5 b0 b3 b4 - temp
byte_perm r27, r27, r6 @ r27 = b14 b9 b15 b10 - temp
byte_perm r28, r28, r7 @ r28 = b13 b8 b11 b12 - temp
shrp r21, r27, r26, #16 @ r21 = b15 b10 b5 b0 - r21 final for W3
shrp r22, r26, r27, #16 @ r22 = b3 b4 b14 b9 - temp
byte_perm r22, r22, r8 @ r22 = b3 b14 b9 b4 - 22 final for W0
shrp r23, r25, r28, #16 @ r23 = b7 b2 b13 b8 - r23 final for W1
shrp r24, r28, r25, #16 @ r24 = b11 b12 b6 b1 - temp
byte_perm r24, r24, r8 @ r24 = b11 b6 b1 b12- r24 final for W2
@ parallel table lookup to generate round output for each word
@ r15 is reserved for storing initial address of memory block for all subkeys
load r20, r15, #32 @ load W0 of subkey R[8] from memory
ptr.x4 r16, r22, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W0 into r16
@ pc_s = 1126, r16 = 8596b0c5
load r20, r15, #33 @ load W1 of subkey R[8] from memory
ptr.x4 r17, r23, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W1 into r17
@ pc_s = 1128, r16 = 655d98ad
load r20, r15, #34 @ load W2 of subkey R[8] from memory
ptr.x4 r18, r24, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W2 into r18
@ pc_s = 1130, r16 = 0445332d
load r20, r15, #35 @ load W3 of subkey R[8] from memory
ptr.x4 r19, r21, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W3 into r19
@ pc_s = 1132, r16 = ea835cf0
@ round output 4 words are stored in r19, r18, r17, r16
@ --------------- AES 9th round operation ----------------------
@ round input 4 words are stored in r19, r18, r17, r16
@ convet state bytes from: r19-r16 = b15 b14 b13 b12 | b11 b10 b9 b8 | b7 b6 b5 b4 | b3 b2 b1 b0
@ to the new order of: r24-r21 = b15 b10 b5 b0 | b11 b6 b1 b12 | b7 b2 b13 b8 | b3 b14 b9 b4
byte_perm r20, r16, r5 @ r20 = b3 b0 b2 b1 - temp
shrp r25, r20, r17, #16 @ r25 = b2 b1 b7 b6 - temp
shrp r26, r17, r20, #16 @ r26 = b5 b4 b3 b0 - temp
byte_perm r20, r18, r5 @ r20 = b11 b8 b10 b9 - temp
shrp r27, r20, r19, #16 @ r27 = b10 b9 b15 b14 - temp
shrp r28, r19, r20, #16 @ r28 = b13 b12 b11 b8 - temp
byte_perm r25, r25, r6 @ r25 = b6 b1 b7 b2 - temp
byte_perm r26, r26, r7 @ r26 = b5 b0 b3 b4 - temp
byte_perm r27, r27, r6 @ r27 = b14 b9 b15 b10 - temp
byte_perm r28, r28, r7 @ r28 = b13 b8 b11 b12 - temp
shrp r21, r27, r26, #16 @ r21 = b15 b10 b5 b0 - r21 final for W3
shrp r22, r26, r27, #16 @ r22 = b3 b4 b14 b9 - temp
byte_perm r22, r22, r8 @ r22 = b3 b14 b9 b4 - 22 final for W0
shrp r23, r25, r28, #16 @ r23 = b7 b2 b13 b8 - r23 final for W1
shrp r24, r28, r25, #16 @ r24 = b11 b12 b6 b1 - temp
byte_perm r24, r24, r8 @ r24 = b11 b6 b1 b12- r24 final for W2
@ parallel table lookup to generate round output for each word
@ r15 is reserved for storing initial address of memory block for all subkeys
load r20, r15, #36 @ load W0 of subkey R[9] from memory
ptr.x4 r16, r22, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W0 into r16
@ pc_s = 1150, r16 = 1bc342d2
load r20, r15, #37 @ load W1 of subkey R[9] from memory
ptr.x4 r17, r23, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W1 into r17
@ pc_s = 1152, r16 = 8ba113e7
load r20, r15, #38 @ load W2 of subkey R[9] from memory
ptr.x4 r18, r24, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W2 into r18
@ pc_s = 1154, r16 = 592e3884
load r20, r15, #39 @ load W3 of subkey R[9] from memory
ptr.x4 r19, r21, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W3 into r19
@ pc_s = 1156, r16 = eb40f21e
@ round output 4 words are stored in r19, r18, r17, r16
@ --------------- AES 10th round operation ----------------------
@ round input 4 words are stored in r19, r18, r17, r16
@ convet state bytes from: r19-r16 = b15 b14 b13 b12 | b11 b10 b9 b8 | b7 b6 b5 b4 | b3 b2 b1 b0
@ to the new order of: r24-r21 = b5 b0 b15 b10 | b1 b12 b11 b6 | b13 b8 b7 b2 | b9 b4 b3 b14
@ note: r24-r21 order is different to R[1]-R[9]
byte_perm r20, r16, r9 @ r20 = b3 b0 b2 b1 - temp
shrp r25, r20, r17, #16 @ r25 = b2 b1 b7 b6 - temp
shrp r26, r17, r20, #16 @ r26 = b5 b4 b3 b0 - temp
byte_perm r20, r18, r9 @ r20 = b11 b8 b10 b9 - temp
shrp r27, r20, r19, #16 @ r27 = b10 b9 b15 b14 - temp
shrp r28, r19, r20, #16 @ r28 = b13 b12 b11 b8 - temp
byte_perm r25, r25, r10 @ r25 = b7 b2 b6 b1 - temp
byte_perm r26, r26, r11 @ r26 = b3 b4 b5 b0 - temp
byte_perm r27, r27, r10 @ r27 = b15 b10 b14 b9 - temp
byte_perm r28, r28, r11 @ r28 = b11 b12 b13 b8 - temp
shrp r21, r26, r27, #16 @ r21 = b5 b0 b15 b10 - r21 final for W3
shrp r22, r27, r26, #16 @ r22 = b14 b9 b3 b4 - temp
byte_perm r22, r22, r12 @ r22 = b9 b4 b3 b14 - r22 final for W0
shrp r23, r28, r25, #16 @ r23 = b13 b8 b7 b2 - r23 final for W1
shrp r24, r25, r28, #16 @ r24 = b6 b1 b11 b12 - temp
byte_perm r24, r24, r12 @ r24 = b1 b12 b11 b6 - r24 final for W2
@ parallel table lookup to generate round output for each word
@ r15 is reserved for storing initial address of memory block for all subkeys
load r20, r15, #40 @ load W0 of subkey R[10] from memory
ptr.x4.2 r16, r22, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W0 into r16
@ pc_s = 1174, r16 = 196a0b32
load r20, r15, #41 @ load W1 of subkey R[10] from memory
ptr.x4.2 r17, r23, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W1 into r17
@ pc_s = 1176, r16 = dc118597
load r20, r15, #42 @ load W2 of subkey R[10] from memory
ptr.x4.2 r18, r24, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W2 into r18
@ pc_s = 1178, r16 = 02dc09fb
load r20, r15, #43 @ load W3 of subkey R[10] from memory
ptr.x4.2 r19, r21, r20 @ lookup 4 tables, XOR the results with round subkey;
@ store the round output W3 into r19
@ pc_s = 1180, r16 = 3925841d
store r19, r0, #0xf0
store r18, r0, #0xf1
store r17, r0, #0xf2
store r16, r0, #0xf3
@ round output 4 words are stored in r19, r18, r17, r16
@ Test Notes: 1). set ModelSim "run length" = 75us (total run time = 74509ns)
@ 2). round 0 instruction pc_s is 934
@ 3). total pax32 AES encryption takes 248 instructions.
@==================================================
.size main, .-main
.ident "GCC: (GNU) 4.1.0"