From 9bbf7d95f6070d7962a754eb74cf895bfb7f615e Mon Sep 17 00:00:00 2001 From: Logan oos Even <46396513+Logan007@users.noreply.github.com> Date: Thu, 7 Jan 2021 15:59:44 +0545 Subject: [PATCH] hardware-accelerated header encryption (#565) * hardware-accelerated header encryption * hardware-accelerated header encryption * hardware-accelerated header encryption --- include/speck.h | 33 +-- src/header_encryption.c | 14 +- src/speck.c | 505 +++++++++++++++++++--------------------- src/transform_speck.c | 4 +- 4 files changed, 255 insertions(+), 301 deletions(-) diff --git a/include/speck.h b/include/speck.h index 666fdfe..b6a8235 100644 --- a/include/speck.h +++ b/include/speck.h @@ -17,7 +17,7 @@ */ -// cipher SPECK -- 128 bit block size -- 256 bit key size -- CTR mode +// cipher SPECK -- 128 bit block size -- 128 and 256 bit key size -- CTR mode // taken from (and modified: removed pure crypto-stream generation and seperated key expansion) // https://github.com/nsacyber/simon-speck-supercop/blob/master/crypto_stream/speck128256ctr/ @@ -51,6 +51,7 @@ typedef struct { u256 rk[34]; u64 key[34]; + u32 keysize; } speck_context_t; @@ -67,6 +68,7 @@ typedef struct { typedef struct { u128 rk[34]; u64 key[34]; + u32 keysize; } speck_context_t; @@ -78,8 +80,9 @@ typedef struct { #define u128 uint64x2_t typedef struct { - u128 rk[34]; - u64 key[34]; + u128 rk[34]; + u64 key[34]; + u32 keysize; } speck_context_t; @@ -88,6 +91,7 @@ typedef struct { typedef struct { u64 key[34]; + u32 keysize; } speck_context_t; @@ -98,39 +102,26 @@ int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long i const unsigned char *n, speck_context_t *ctx); -int speck_init (const unsigned char *k, speck_context_t **ctx); +int speck_init (speck_context_t **ctx, const unsigned char *k, int keysize); int speck_deinit (speck_context_t *ctx); // ---------------------------------------------------------------------------------------------------------------- - - -// cipher SPECK -- 128 bit block size -- 128 bit key size -- CTR mode -// used for header encryption, thus the postfix '_he' -// for now: just plain C -- AVX, SSE, NEON do not make sense for short header - - -int speck_he (unsigned char *out, const unsigned char *in, unsigned long long inlen, - const unsigned char *n, speck_context_t *ctx); - -int speck_expand_key_he (const unsigned char *k, speck_context_t *ctx); - - // ---------------------------------------------------------------------------------------------------------------- // cipher SPECK -- 96 bit block size -- 96 bit key size -- ECB mode // follows endianess rules as used in official implementation guide and NOT as in original 2013 cipher presentation -// used for IV in header encryption, thus the in/postfix 'he_iv' +// used for IV in header encryption // for now: just plain C -- probably no need for AVX, SSE, NEON -int speck_he_iv_encrypt (unsigned char *inout, speck_context_t *ctx); +int speck_96_encrypt (unsigned char *inout, speck_context_t *ctx); -int speck_he_iv_decrypt (unsigned char *inout, speck_context_t *ctx); +int speck_96_decrypt (unsigned char *inout, speck_context_t *ctx); -int speck_expand_key_he_iv (const unsigned char *k, speck_context_t *ctx); +int speck_96_expand_key (speck_context_t *ctx, const unsigned char *k); #endif // SPECK_H diff --git a/src/header_encryption.c b/src/header_encryption.c index c8e08c7..1813d74 100644 --- a/src/header_encryption.c +++ b/src/header_encryption.c @@ -40,15 +40,15 @@ uint32_t packet_header_decrypt (uint8_t packet[], uint16_t packet_len, uint32_t test_magic; // check for magic bytes and reasonable value in header len field // so, as a first step, decrypt 4 bytes only starting at byte 12 - speck_he((uint8_t*)&test_magic, &packet[12], 4, iv, (speck_context_t*)ctx); + speck_ctr((uint8_t*)&test_magic, &packet[12], 4, iv, (speck_context_t*)ctx); test_magic = be32toh(test_magic); if((((test_magic >> 8) << 8) == magic) /* check the thre uppermost bytes */ && (((uint8_t)test_magic) <= packet_len)) { /* lowest 8 bit of test_magic are header_len */ // decrypt the complete header - speck_he(&packet[12], &packet[12], (uint8_t)(test_magic) - 12, iv, (speck_context_t*)ctx); + speck_ctr(&packet[12], &packet[12], (uint8_t)(test_magic) - 12, iv, (speck_context_t*)ctx); // extract time stamp (first 64 bit) and checksum (last 16 bit) blended in IV - speck_he_iv_decrypt(iv, (speck_context_t*)ctx_iv); + speck_96_decrypt(iv, (speck_context_t*)ctx_iv); *checksum = be16toh(((uint16_t*)iv)[5]); *stamp = be64toh(((uint64_t*)iv)[0]); @@ -88,12 +88,12 @@ int32_t packet_header_encrypt (uint8_t packet[], uint8_t header_len, he_context_ iv32[3] = htobe32(magic); // blend checksum into 96-bit IV - speck_he_iv_encrypt(iv, (speck_context_t*)ctx_iv); + speck_96_encrypt(iv, (speck_context_t*)ctx_iv); memcpy(packet, iv, 16); packet[15] = header_len; - speck_he(&packet[12], &packet[12], header_len - 12, iv, (speck_context_t*)ctx); + speck_ctr(&packet[12], &packet[12], header_len - 12, iv, (speck_context_t*)ctx); return 0; } @@ -106,11 +106,11 @@ void packet_header_setup_key (const char *community_name, he_context_t **ctx, pearson_hash_128(key, (uint8_t*)community_name, N2N_COMMUNITY_SIZE); *ctx = (he_context_t*)calloc(1, sizeof (speck_context_t)); - speck_expand_key_he(key, (speck_context_t*)*ctx); + speck_init((speck_context_t**)ctx, key, 128); // hash again and use last 96 bit (skipping 4 bytes) as key for IV encryption // REMOVE as soon as checksum and replay protection get their own fields pearson_hash_128(key, key, sizeof (key)); *ctx_iv = (he_context_t*)calloc(1, sizeof (speck_context_t)); - speck_expand_key_he_iv(&key[4], (speck_context_t*)*ctx_iv); + speck_96_expand_key((speck_context_t*)*ctx_iv, &key[4]); } diff --git a/src/speck.c b/src/speck.c index b68a6a1..0a8285e 100644 --- a/src/speck.c +++ b/src/speck.c @@ -17,7 +17,7 @@ */ -// cipher SPECK -- 128 bit block size -- 256 bit key size -- CTR mode +// cipher SPECK -- 128 bit block size -- 128 and 256 bit key size -- CTR mode // taken from (and modified: removed pure crypto-stream generation and seperated key expansion) // https://github.com/nsacyber/simon-speck-supercop/blob/master/crypto_stream/speck128256ctr/ @@ -61,15 +61,11 @@ #define ROL(X,r) (XOR(SL(X,r),SR(X,(64-r)))) #define ROR(X,r) (XOR(SR(X,r),SL(X,(64-r)))) -#define numrounds 34 -#define numkeywords 4 - #define R(X,Y,k) (X=XOR(ADD(ROR8(X),Y),k), Y=XOR(ROL(Y,3),X)) #define Rx4(X,Y,k) (R(X[0],Y[0],k)) #define Rx8(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k)) #define Rx12(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k), R(X[2],Y[2],k)) - #define Rx16(X,Y,k) (X[0]=ROR8(X[0]), X[0]=ADD(X[0],Y[0]), X[1]=ROR8(X[1]), X[1]=ADD(X[1],Y[1]), \ X[2]=ROR8(X[2]), X[2]=ADD(X[2],Y[2]), X[3]=ROR8(X[3]), X[3]=ADD(X[3],Y[3]), \ X[0]=XOR(X[0],k), X[1]=XOR(X[1],k), X[2]=XOR(X[2],k), X[3]=XOR(X[3],k), \ @@ -79,18 +75,19 @@ Y[0]=XOR(Y[0],Z[0]), Y[1]=XOR(Y[1],Z[1]), Y[2]=XOR(Y[2],Z[2]), Y[3]=XOR(Y[3],Z[3]), \ Y[0]=XOR(X[0],Y[0]), Y[1]=XOR(X[1],Y[1]), Y[2]=XOR(X[2],Y[2]), Y[3]=XOR(X[3],Y[3])) -#define Rx2(x,y,k) (x[0]=RCS(x[0],8), x[1]=RCS(x[1],8), x[0]+=y[0], x[1]+=y[1], \ - x[0]^=k, x[1]^=k, y[0]=LCS(y[0],3), y[1]=LCS(y[1],3), y[0]^=x[0], y[1]^=x[1]) - -#define Rx1(x,y,k) (x[0]=RCS(x[0],8), x[0]+=y[0], x[0]^=k, y[0]=LCS(y[0],3), y[0]^=x[0]) - +#define Rx1(x,y,k) (x[0]=RCS(x[0],8), x[0]+=y[0], x[0]^=k, y[0]=LCS(y[0],3), y[0]^=x[0]) #define Rx1b(x,y,k) (x=RCS(x,8), x+=y, x^=k, y=LCS(y,3), y^=x) +#define Rx2(x,y,k) (x[0]=RCS(x[0],8), x[1]=RCS(x[1],8), x[0]+=y[0], x[1]+=y[1], \ + x[0]^=k, x[1]^=k, y[0]=LCS(y[0],3), y[1]=LCS(y[1],3), y[0]^=x[0], y[1]^=x[1]) -#define Encrypt(X,Y,k,n) (Rx##n(X,Y,k[0]), Rx##n(X,Y,k[1]), Rx##n(X,Y,k[2]), Rx##n(X,Y,k[3]), Rx##n(X,Y,k[4]), Rx##n(X,Y,k[5]), Rx##n(X,Y,k[6]), Rx##n(X,Y,k[7]), \ - Rx##n(X,Y,k[8]), Rx##n(X,Y,k[9]), Rx##n(X,Y,k[10]), Rx##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx##n(X,Y,k[15]), \ - Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18]), Rx##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx##n(X,Y,k[23]), \ - Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26]), Rx##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx##n(X,Y,k[31]), \ - Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33])) + +#define Encrypt_128(X,Y,k,n) (Rx##n(X,Y,k[0]), Rx##n(X,Y,k[1]), Rx##n(X,Y,k[2]), Rx##n(X,Y,k[3]), Rx##n(X,Y,k[4]), Rx##n(X,Y,k[5]), Rx##n(X,Y,k[6]), Rx##n(X,Y,k[7]), \ + Rx##n(X,Y,k[8]), Rx##n(X,Y,k[9]), Rx##n(X,Y,k[10]), Rx##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx##n(X,Y,k[15]), \ + Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18]), Rx##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx##n(X,Y,k[23]), \ + Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26]), Rx##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx##n(X,Y,k[31])) + +#define Encrypt_256(X,Y,k,n) (Encrypt_128(X,Y,k,n), \ + Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33])) #define RK(X,Y,k,key,i) (SET1(k[i],Y), key[i]=Y, X=RCS(X,8), X+=Y, X^=i, Y=LCS(Y,3), Y^=X) @@ -100,61 +97,68 @@ RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \ RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33)) +#define Encrypt_Dispatcher(keysize) \ + u64 x[2], y[2]; \ + u256 X[4], Y[4], Z[4]; \ + \ + if(numbytes == 16) { \ + x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; \ + Encrypt_##keysize(x, y, ctx->key, 1); \ + ((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; \ + return 0; \ + } \ + \ + if(numbytes == 32) { \ + x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; \ + x[1] = nonce[1]; y[1] = nonce[0]; nonce[0]++; \ + Encrypt_##keysize(x , y, ctx->key, 2); \ + ((u64 *)out)[1] = x[0] ^ ((u64 *)in)[1]; ((u64 *)out)[0] = y[0] ^ ((u64 *)in)[0]; \ + ((u64 *)out)[3] = x[1] ^ ((u64 *)in)[3]; ((u64 *)out)[2] = y[1] ^ ((u64 *)in)[2]; \ + return 0; \ + } \ + \ + SET1(X[0], nonce[1]); SET4(Y[0], nonce[0]); \ + \ + if(numbytes == 64) \ + Encrypt_##keysize(X, Y, ctx->rk, 4); \ + else { \ + X[1] = X[0]; \ + Y[1] = ADD(Y[0], _four); \ + if(numbytes == 128) \ + Encrypt_##keysize(X, Y, ctx->rk, 8); \ + else { \ + X[2] = X[0]; \ + Y[2] = ADD(Y[1], _four); \ + if(numbytes == 192) \ + Encrypt_##keysize(X, Y, ctx->rk, 12); \ + else { \ + X[3] = X[0]; \ + Y[3] = ADD(Y[2], _four); \ + Encrypt_##keysize(X, Y, ctx->rk, 16); \ + } \ + } \ + } \ + \ + nonce[0] += (numbytes >> 4); \ + \ + XOR_STORE(in, out, X[0], Y[0]); \ + if (numbytes >= 128) \ + XOR_STORE(in + 64, out + 64, X[1], Y[1]); \ + if(numbytes >= 192) \ + XOR_STORE(in + 128, out + 128, X[2], Y[2]); \ + if(numbytes >= 256) \ + XOR_STORE(in + 192, out + 192, X[3], Y[3]); \ + \ + return 0 + static int speck_encrypt_xor(unsigned char *out, const unsigned char *in, u64 nonce[], speck_context_t *ctx, int numbytes) { - u64 x[2], y[2]; - u256 X[4], Y[4], Z[4]; - - if(numbytes == 16) { - x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; - Encrypt(x, y, ctx->key, 1); - ((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; - return 0; + if(ctx->keysize == 256) { + Encrypt_Dispatcher(256); + } else { + Encrypt_Dispatcher(128); } - - if(numbytes == 32) { - x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; - x[1] = nonce[1]; y[1] = nonce[0]; nonce[0]++; - Encrypt(x , y, ctx->key, 2); - ((u64 *)out)[1] = x[0] ^ ((u64 *)in)[1]; ((u64 *)out)[0] = y[0] ^ ((u64 *)in)[0]; - ((u64 *)out)[3] = x[1] ^ ((u64 *)in)[3]; ((u64 *)out)[2] = y[1] ^ ((u64 *)in)[2]; - return 0; - } - - SET1(X[0], nonce[1]); SET4(Y[0], nonce[0]); - - if(numbytes == 64) - Encrypt(X, Y, ctx->rk, 4); - else { - X[1] = X[0]; - Y[1] = ADD(Y[0], _four); - if(numbytes == 128) - Encrypt(X, Y, ctx->rk, 8); - else { - X[2] = X[0]; - Y[2] = ADD(Y[1], _four); - if(numbytes == 192) - Encrypt(X, Y, ctx->rk, 12); - else { - X[3] = X[0]; - Y[3] = ADD(Y[2], _four); - Encrypt(X, Y, ctx->rk, 16); - } - } - } - - nonce[0] += (numbytes >> 4); - - XOR_STORE(in, out, X[0], Y[0]); - if (numbytes >= 128) - XOR_STORE(in + 64, out + 64, X[1], Y[1]); - if(numbytes >= 192) - XOR_STORE(in + 128, out + 128, X[2], Y[2]); - if(numbytes >= 256) - XOR_STORE(in + 192, out + 192, X[3], Y[3]); - - return 0; } @@ -205,7 +209,7 @@ static int internal_speck_ctr(unsigned char *out, const unsigned char *in, unsig } if(inlen > 0) { - speck_encrypt_xor (block, in, nonce, ctx, 16); + speck_encrypt_xor(block, in, nonce, ctx, 16); for(i = 0; i < inlen; i++) out[i] = block[i] ^ in[i]; } @@ -214,15 +218,22 @@ static int internal_speck_ctr(unsigned char *out, const unsigned char *in, unsig } -static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { +static int speck_expand_key (speck_context_t *ctx, const unsigned char *k, int keysize) { u64 K[4]; size_t i; - for(i = 0; i < numkeywords; i++) - K[i] = ((u64 *)k)[i]; + for(i = 0; i < (keysize >> 6); i++) + K[i] = ((u64 *)k)[i]; - EK(K[0], K[1], K[2], K[3], ctx->rk, ctx->key); + // 128 bit has only two keys A and B thus replacing both C and D with B then + if(keysize == 128) { + EK(K[0], K[1], K[1], K[1], ctx->rk, ctx->key); + } else { + EK(K[0], K[1], K[2], K[3], ctx->rk, ctx->key); + } + + ctx->keysize = keysize; return 0; } @@ -270,15 +281,11 @@ static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { #define ROR8(X) (ROR(X,8)) #endif // SSS3 vs. SSE2 ---------------------------------------------- -#define numrounds 34 -#define numkeywords 4 - #define R(X,Y,k) (X=XOR(ADD(ROR8(X),Y),k), Y=XOR(ROL(Y,3),X)) #define Rx2(X,Y,k) (R(X[0],Y[0],k)) #define Rx4(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k)) #define Rx6(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k), R(X[2],Y[2],k)) - #define Rx8(X,Y,k) (X[0]=ROR8(X[0]), X[0]=ADD(X[0],Y[0]), X[1]=ROR8(X[1]), X[1]=ADD(X[1],Y[1]), \ X[2]=ROR8(X[2]), X[2]=ADD(X[2],Y[2]), X[3]=ROR8(X[3]), X[3]=ADD(X[3],Y[3]), \ X[0]=XOR(X[0],k), X[1]=XOR(X[1],k), X[2]=XOR(X[2],k), X[3]=XOR(X[3],k), \ @@ -289,14 +296,15 @@ static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { Y[0]=XOR(X[0],Y[0]), Y[1]=XOR(X[1],Y[1]), Y[2]=XOR(X[2],Y[2]), Y[3]=XOR(X[3],Y[3])) #define Rx1(x,y,k) (x[0]=RCS(x[0],8), x[0]+=y[0], x[0]^=k, y[0]=LCS(y[0],3), y[0]^=x[0]) - #define Rx1b(x,y,k) (x=RCS(x,8), x+=y, x^=k, y=LCS(y,3), y^=x) -#define Encrypt(X,Y,k,n) (Rx##n(X,Y,k[0]), Rx##n(X,Y,k[1]), Rx##n(X,Y,k[2]), Rx##n(X,Y,k[3]), Rx##n(X,Y,k[4]), Rx##n(X,Y,k[5]), Rx##n(X,Y,k[6]), Rx##n(X,Y,k[7]), \ - Rx##n(X,Y,k[8]), Rx##n(X,Y,k[9]), Rx##n(X,Y,k[10]), Rx##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx##n(X,Y,k[15]), \ - Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18]), Rx##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx##n(X,Y,k[23]), \ - Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26]), Rx##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx##n(X,Y,k[31]), \ - Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33])) +#define Encrypt_128(X,Y,k,n) (Rx##n(X,Y,k[0]), Rx##n(X,Y,k[1]), Rx##n(X,Y,k[2]), Rx##n(X,Y,k[3]), Rx##n(X,Y,k[4]), Rx##n(X,Y,k[5]), Rx##n(X,Y,k[6]), Rx##n(X,Y,k[7]), \ + Rx##n(X,Y,k[8]), Rx##n(X,Y,k[9]), Rx##n(X,Y,k[10]), Rx##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx##n(X,Y,k[15]), \ + Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18]), Rx##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx##n(X,Y,k[23]), \ + Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26]), Rx##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx##n(X,Y,k[31])) + +#define Encrypt_256(X,Y,k,n) (Encrypt_128(X,Y,k,n), \ + Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33])) #define RK(X,Y,k,key,i) (SET1(k[i],Y), key[i]=Y, X=RCS(X,8), X+=Y, X^=i, Y=LCS(Y,3), Y^=X) @@ -306,50 +314,57 @@ static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \ RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33)) +#define Encrypt_Dispatcher(keysize) \ + u64 x[2], y[2]; \ + u128 X[4], Y[4], Z[4]; \ + \ + if(numbytes == 16) { \ + x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; \ + Encrypt_##keysize(x, y, ctx.key, 1); \ + ((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; \ + return 0; \ + } \ + \ + SET1(X[0], nonce[1]); SET2(Y[0], nonce[0]); \ + \ + if(numbytes == 32) \ + Encrypt_##keysize(X, Y, ctx.rk, 2); \ + else { \ + X[1] = X[0]; Y[1] = ADD(Y[0], _two); \ + if(numbytes == 64) \ + Encrypt_##keysize(X, Y, ctx.rk, 4); \ + else { \ + X[2] = X[0]; Y[2] = ADD(Y[1], _two); \ + if(numbytes == 96) \ + Encrypt_##keysize(X, Y, ctx.rk, 6); \ + else { \ + X[3] = X[0]; Y[3] = ADD(Y[2], _two); \ + Encrypt_##keysize(X, Y, ctx.rk, 8); \ + } \ + } \ + } \ + \ + nonce[0] += (numbytes >> 4); \ + \ + XOR_STORE(in, out, X[0], Y[0]); \ + if(numbytes >= 64) \ + XOR_STORE(in + 32, out + 32, X[1], Y[1]); \ + if(numbytes >= 96) \ + XOR_STORE(in + 64, out + 64, X[2], Y[2]); \ + if(numbytes >= 128) \ + XOR_STORE(in + 96, out + 96, X[3], Y[3]); \ + \ + return 0 + // attention: ctx is provided by value as it is faster in this case, astonishingly static int speck_encrypt_xor (unsigned char *out, const unsigned char *in, u64 nonce[], const speck_context_t ctx, int numbytes) { - u64 x[2], y[2]; - u128 X[4], Y[4], Z[4]; - - if(numbytes == 16) { - x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; - Encrypt(x, y, ctx.key, 1); - ((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; - return 0; + if(ctx.keysize == 256) { + Encrypt_Dispatcher(256); + } else { + Encrypt_Dispatcher(128); } - - SET1(X[0], nonce[1]); SET2 (Y[0], nonce[0]); - - if(numbytes == 32) - Encrypt(X, Y, ctx.rk, 2); - else { - X[1] = X[0]; Y[1] = ADD(Y[0], _two); - if(numbytes == 64) - Encrypt(X, Y, ctx.rk, 4); - else { - X[2] = X[0]; Y[2] = ADD(Y[1], _two); - if(numbytes == 96) - Encrypt(X, Y, ctx.rk, 6); - else { - X[3] = X[0]; Y[3] = ADD(Y[2], _two); - Encrypt(X, Y, ctx.rk, 8); - } - } - } - - nonce[0] += (numbytes >> 4); - - XOR_STORE(in, out, X[0], Y[0]); - if(numbytes >= 64) - XOR_STORE(in + 32, out + 32, X[1], Y[1]); - if(numbytes >= 96) - XOR_STORE(in + 64, out + 64, X[2], Y[2]); - if(numbytes >= 128) - XOR_STORE(in + 96, out + 96, X[3], Y[3]); - - return 0; } @@ -405,15 +420,22 @@ static int internal_speck_ctr (unsigned char *out, const unsigned char *in, unsi } -static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { +static int speck_expand_key (speck_context_t *ctx, const unsigned char *k, int keysize) { u64 K[4]; size_t i; - for(i = 0; i < numkeywords; i++) + for(i = 0; i < (keysize >> 6 ); i++) K[i] = ((u64 *)k)[i]; - EK(K[0], K[1], K[2], K[3], ctx->rk, ctx->key); + // 128 bit has only two keys A and B thus replacing both C and D with B then + if(keysize == 128) { + EK(K[0], K[1], K[1], K[1], ctx->rk, ctx->key); + } else { + EK(K[0], K[1], K[2], K[3], ctx->rk, ctx->key); + } + + ctx->keysize = keysize; return 0; } @@ -448,13 +470,9 @@ static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { #define ROR8(X) SET(vtbl1_u8((uint8x8_t)vget_low_u64(X),tableR), vtbl1_u8((uint8x8_t)vget_high_u64(X),tableR)) #define ROL8(X) SET(vtbl1_u8((uint8x8_t)vget_low_u64(X),tableL), vtbl1_u8((uint8x8_t)vget_high_u64(X),tableL)) -#define numrounds 34 -#define numkeywords 4 - #define R(X,Y,k) (X=XOR(ADD(ROR8(X),Y),k), Y=XOR(ROL(Y,3),X)) #define Rx2(X,Y,k) (R(X[0],Y[0],k)) - #define Rx4(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k)) #define Rx6(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k), R(X[2],Y[2],k)) #define Rx8(X,Y,k) (X[0]=ROR8(X[0]), X[0]=ADD(X[0],Y[0]), X[0]=XOR(X[0],k), X[1]=ROR8(X[1]), X[1]=ADD(X[1],Y[1]), X[1]=XOR(X[1],k), \ @@ -465,14 +483,15 @@ static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { Y[0]=XOR(X[0],Y[0]), Y[1]=XOR(X[1],Y[1]), Y[2]=XOR(X[2],Y[2]), Y[3]=XOR(X[3],Y[3])) #define Rx1(x,y,k) (x[0]=RCS(x[0],8), x[0]+=y[0], x[0]^=k, y[0]=LCS(y[0],3), y[0]^=x[0]) - #define Rx1b(x,y,k) (x=RCS(x,8), x+=y, x^=k, y=LCS(y,3), y^=x) -#define Encrypt(X,Y,k,n) (Rx##n(X,Y,k[0]), Rx##n(X,Y,k[1]), Rx##n(X,Y,k[2]), Rx##n(X,Y,k[3]), Rx##n(X,Y,k[4]), Rx##n(X,Y,k[5]), Rx##n(X,Y,k[6]), Rx##n(X,Y,k[7]), \ - Rx##n(X,Y,k[8]), Rx##n(X,Y,k[9]), Rx##n(X,Y,k[10]), Rx##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx##n(X,Y,k[15]), \ - Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18]), Rx##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx##n(X,Y,k[23]), \ - Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26]), Rx##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx##n(X,Y,k[31]), \ - Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33])) +#define Encrypt_128(X,Y,k,n) (Rx##n(X,Y,k[0]), Rx##n(X,Y,k[1]), Rx##n(X,Y,k[2]), Rx##n(X,Y,k[3]), Rx##n(X,Y,k[4]), Rx##n(X,Y,k[5]), Rx##n(X,Y,k[6]), Rx##n(X,Y,k[7]), \ + Rx##n(X,Y,k[8]), Rx##n(X,Y,k[9]), Rx##n(X,Y,k[10]), Rx##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx##n(X,Y,k[15]), \ + Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18]), Rx##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx##n(X,Y,k[23]), \ + Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26]), Rx##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx##n(X,Y,k[31])) + +#define Encrypt_256(X,Y,k,n) (Encrypt_128(X,Y,k,n), \ + Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33])) #define RK(X,Y,k,key,i) (SET1(k[i],Y), key[i]=Y, X=RCS(X,8), X+=Y, X^=i, Y=LCS(Y,3), Y^=X) @@ -482,47 +501,54 @@ static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \ RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33)) +#define Encrypt_Dispatcher(keysize) \ + u64 x[2], y[2]; \ + u128 X[4], Y[4], Z[4]; \ + \ + if(numbytes == 16) { \ + x[0] = nonce[1]; y[0]=nonce[0]; nonce[0]++; \ + Encrypt_##keysize(x, y, ctx->key, 1); \ + ((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; \ + return 0; \ + } \ + \ + SET1(X[0], nonce[1]); SET2(Y[0], nonce[0]); \ + \ + if(numbytes == 32) \ + Encrypt_##keysize(X, Y, ctx->rk, 2); \ + else { \ + X[1] = X[0]; SET2(Y[1], nonce[0]); \ + if(numbytes == 64) \ + Encrypt_##keysize(X, Y, ctx->rk, 4); \ + else { \ + X[2] = X[0]; SET2(Y[2], nonce[0]); \ + if(numbytes == 96) \ + Encrypt_##keysize(X, Y, ctx->rk, 6); \ + else { \ + X[3] = X[0]; SET2(Y[3], nonce[0]); \ + Encrypt_##keysize(X, Y, ctx->rk, 8); \ + } \ + } \ + } \ + \ + XOR_STORE(in, out, X[0], Y[0]); \ + if(numbytes >= 64) \ + XOR_STORE(in + 32, out + 32, X[1], Y[1]); \ + if(numbytes >= 96) \ + XOR_STORE(in + 64, out + 64, X[2], Y[2]); \ + if(numbytes >= 128) \ + XOR_STORE(in + 96, out + 96, X[3], Y[3]); \ + \ + return 0 + static int speck_encrypt_xor (unsigned char *out, const unsigned char *in, u64 nonce[], speck_context_t *ctx, int numbytes) { - u64 x[2], y[2]; - u128 X[4], Y[4], Z[4]; - - if(numbytes == 16) { - x[0] = nonce[1]; y[0]=nonce[0]; nonce[0]++; - Encrypt(x, y, ctx->key, 1); - ((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; - return 0; + if(ctx->keysize == 256) { + Encrypt_Dispatcher(256); + } else { + Encrypt_Dispatcher(128); } - - SET1(X[0], nonce[1]); SET2(Y[0], nonce[0]); - - if(numbytes == 32) - Encrypt(X, Y, ctx->rk, 2); - else { - X[1] = X[0]; SET2(Y[1], nonce[0]); - if(numbytes == 64) - Encrypt(X, Y, ctx->rk, 4); - else { - X[2] = X[0]; SET2(Y[2], nonce[0]); - if(numbytes == 96) - Encrypt(X, Y, ctx->rk, 6); - else { - X[3] = X[0]; SET2(Y[3], nonce[0]); - Encrypt(X, Y, ctx->rk, 8); - } - } - } - - XOR_STORE(in, out, X[0], Y[0]); - if(numbytes >= 64) - XOR_STORE(in + 32, out + 32, X[1], Y[1]); - if(numbytes >= 96) - XOR_STORE(in + 64, out + 64, X[2], Y[2]); - if(numbytes >= 128) - XOR_STORE(in + 96, out + 96, X[3], Y[3]); - - return 0; } @@ -577,15 +603,22 @@ static int internal_speck_ctr (unsigned char *out, const unsigned char *in, unsi } -static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { +static int speck_expand_key (speck_context_t *ctx, const unsigned char *k, int keysize) { u64 K[4]; size_t i; - for(i = 0; i < numkeywords; i++) + for(i = 0; i < (keysize >> 6); i++) K[i] = ((u64 *)k)[i]; - EK(K[0], K[1], K[2], K[3], ctx->rk, ctx->key); + // 128 bit has only two keys A and B thus replacing both C and D with B then + if(keysize == 128) { + EK(K[0], K[1], K[1], K[1], ctx->rk, ctx->key); + } else { + EK(K[0], K[1], K[2], K[3], ctx->rk, ctx->key); + } + + ctx->keysize = keysize; return 0; } @@ -599,11 +632,11 @@ static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { #define R(x,y,k) (x=ROR(x,8), x+=y, x^=k, y=ROL(y,3), y^=x) -static int speck_encrypt (u64 *u, u64 *v, speck_context_t *ctx) { +static int speck_encrypt (u64 *u, u64 *v, speck_context_t *ctx, int numrounds) { u64 i, x = *u, y = *v; - for(i = 0; i < 34; i++) + for(i = 0; i < numrounds; i++) R(x, y, ctx->key[i]); *u = x; *v = y; @@ -617,6 +650,7 @@ static int internal_speck_ctr (unsigned char *out, const unsigned char *in, unsi u64 i, nonce[2], x, y, t; unsigned char *block = malloc(16); + int numrounds = (ctx->keysize == 256)?34:32; if(!inlen) { free(block); @@ -628,7 +662,7 @@ static int internal_speck_ctr (unsigned char *out, const unsigned char *in, unsi t=0; while(inlen >= 16) { x = nonce[1]; y = nonce[0]; nonce[0]++; - speck_encrypt(&x, &y, ctx); + speck_encrypt(&x, &y, ctx, numrounds); ((u64 *)out)[1+t] = htole64(x ^ ((u64 *)in)[1+t]); ((u64 *)out)[0+t] = htole64(y ^ ((u64 *)in)[0+t]); t += 2; @@ -637,7 +671,7 @@ static int internal_speck_ctr (unsigned char *out, const unsigned char *in, unsi if(inlen > 0) { x = nonce[1]; y = nonce[0]; - speck_encrypt(&x, &y, ctx); + speck_encrypt(&x, &y, ctx, numrounds); ((u64 *)block)[1] = htole64(x); ((u64 *)block)[0] = htole64(y); for(i = 0; i < inlen; i++) out[i + 8*t] = block[i] ^ in[i + 8*t]; @@ -649,24 +683,33 @@ static int internal_speck_ctr (unsigned char *out, const unsigned char *in, unsi } -static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { +static int speck_expand_key (speck_context_t *ctx, const unsigned char *k, int keysize) { u64 K[4]; u64 i; - for(i = 0; i < 4; i++) - K[i] = htole64( ((u64 *)k)[i] ); + for(i = 0; i < (keysize >> 6); i++) + K[i] = htole64( ((u64 *)k)[i] ); for(i = 0; i < 33; i += 3) { ctx->key[i ] = K[0]; R(K[1], K[0], i ); - ctx->key[i+1] = K[0]; - R(K[2], K[0], i + 1); - ctx->key[i+2] = K[0]; - R(K[3], K[0], i + 2); + + if(keysize == 256) { + ctx->key[i+1] = K[0]; + R(K[2], K[0], i + 1); + ctx->key[i+2] = K[0]; + R(K[3], K[0], i + 2); + } else { + // counter the i += 3 to make the loop go one by one in this case + // we can afford the unused 31 and 32 + i -= 2; + } } ctx->key[33] = K[0]; + ctx->keysize = keysize; + return 1; } @@ -674,7 +717,7 @@ static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { #endif // AVX, SSE, NEON, plain C ------------------------------------------------------------------------ -// this functions wraps the call to internal speck_ctr functions which have slightly different +// this functions wraps the call to internal_speck_ctr functions which have slightly different // signature -- ctx by value for SSE with SPECK_CTX_BYVAL defined in speck.h, by name otherwise int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long inlen, const unsigned char *n, speck_context_t *ctx) { @@ -688,7 +731,8 @@ int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long i } -int speck_init (const unsigned char *k, speck_context_t **ctx) { +// create context loaded with round keys ready for use, key size either 128 or 256 (bits) +int speck_init (speck_context_t **ctx, const unsigned char *k, int keysize) { #if defined (SPECK_ALIGNED_CTX) *ctx = (speck_context_t*)_mm_malloc(sizeof(speck_context_t), SPECK_ALIGNED_CTX); @@ -699,7 +743,7 @@ int speck_init (const unsigned char *k, speck_context_t **ctx) { return -1; } - return speck_expand_key(k, *ctx); + return speck_expand_key(*ctx, k, keysize); } @@ -720,87 +764,6 @@ int speck_deinit (speck_context_t *ctx) { // ---------------------------------------------------------------------------------------------------------------- -// cipher SPECK -- 128 bit block size -- 128 bit key size -- CTR mode -// used for header encryption, thus the postfix '_he' -// for now: just plain C -- AVX, SSE, NEON do not make sense for short header - - -#define ROR64(x,r) (((x)>>(r))|((x)<<(64-(r)))) -#define ROL64(x,r) (((x)<<(r))|((x)>>(64-(r)))) -#define R64(x,y,k) (x=ROR64(x,8), x+=y, x^=k, y=ROL64(y,3), y^=x) - - -static int speck_encrypt_he (u64 *u, u64 *v, speck_context_t *ctx) { - - u64 i, x=*u, y=*v; - - for(i = 0; i < 32; i++) - R64(x, y, ctx->key[i]); - - *u = x; *v = y; - - return 0; -} - - -int speck_he (unsigned char *out, const unsigned char *in, unsigned long long inlen, - const unsigned char *n, speck_context_t *ctx) { - - u64 i, nonce[2], x, y, t; - unsigned char *block = malloc(16); - - if(!inlen) { - free(block); - return 0; - } - - nonce[0] = htole64 ( ((u64*)n)[0] ); - nonce[1] = htole64 ( ((u64*)n)[1] ); - - t = 0; - while(inlen >= 16) { - x = nonce[1]; y = nonce[0]; nonce[0]++; - speck_encrypt_he(&x, &y, ctx); - ((u64 *)out)[1+t] = htole64(x ^ ((u64 *)in)[1+t]); - ((u64 *)out)[0+t] = htole64(y ^ ((u64 *)in)[0+t]); - t += 2; - inlen -= 16; - } - - if(inlen > 0) { - x = nonce[1]; y = nonce[0]; - speck_encrypt_he(&x, &y, ctx); - ((u64 *)block)[1] = htole64(x); ((u64 *)block)[0] = htole64(y); - for(i = 0; i < inlen; i++) - out[i+8*t] = block[i] ^ in[i+8*t]; - } - - free(block); - - return 0; -} - - -int speck_expand_key_he (const unsigned char *k, speck_context_t *ctx) { - - u64 A, B; - u64 i; - - A = htole64( ((u64 *)k)[0] ); - B = htole64( ((u64 *)k)[1] ); - - for(i = 0; i < 32; i++) { - ctx->key[i] = A; - R64(B, A, i); - } - - return 1; -} - - -// ---------------------------------------------------------------------------------------------------------------- - - // cipher SPECK -- 96 bit block size -- 96 bit key size -- ECB mode // follows endianess rules as used in official implementation guide and NOT as in original 2013 cipher presentation // used for IV in header encryption, thus the in/postfix 'he_iv' @@ -814,7 +777,7 @@ int speck_expand_key_he (const unsigned char *k, speck_context_t *ctx) { #define DR96(x,y,k) (y^=x, y=ROTR48(y,3), x^=k, x-=y, x=ROTL48(x,8)) -int speck_he_iv_encrypt (unsigned char *inout, speck_context_t *ctx) { +int speck_96_encrypt (unsigned char *inout, speck_context_t *ctx) { u64 x, y; int i; @@ -835,7 +798,7 @@ int speck_he_iv_encrypt (unsigned char *inout, speck_context_t *ctx) { } -int speck_he_iv_decrypt (unsigned char *inout, speck_context_t *ctx) { +int speck_96_decrypt (unsigned char *inout, speck_context_t *ctx) { u64 x, y; int i; @@ -856,7 +819,7 @@ int speck_he_iv_decrypt (unsigned char *inout, speck_context_t *ctx) { } -int speck_expand_key_he_iv (const unsigned char *k, speck_context_t *ctx) { +int speck_96_expand_key (speck_context_t *ctx, const unsigned char *k) { u64 A, B; int i; diff --git a/src/transform_speck.c b/src/transform_speck.c index ecdedfe..9e0c223 100644 --- a/src/transform_speck.c +++ b/src/transform_speck.c @@ -134,8 +134,8 @@ static int setup_speck_key (transop_speck_t *priv, const uint8_t *key, ssize_t k // the input key always gets hashed to make a more unpredictable and more complete use of the key space pearson_hash_256(key_mat_buf, key, key_size); - // expand the key material to the context (= round keys) - speck_init(key_mat_buf, &(priv->ctx)); + // expand the key material to the context (= round keys), 256 bit keysize + speck_init(&(priv->ctx), key_mat_buf, 256); traceEvent(TRACE_DEBUG, "setup_speck_key completed\n");