readability code clean-up (#532)

This commit is contained in:
Logan oos Even 2020-12-19 17:11:32 +05:45 committed by GitHub
parent d063cf3088
commit 1c29fbee8a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 512 additions and 467 deletions

View File

@ -17,7 +17,7 @@
*/ */
// cipher SPECK -- 128 bit block size -- 256 bit key size // cipher SPECK -- 128 bit block size -- 256 bit key size -- CTR mode
// taken from (and modified: removed pure crypto-stream generation and seperated key expansion) // taken from (and modified: removed pure crypto-stream generation and seperated key expansion)
// https://github.com/nsacyber/simon-speck-supercop/blob/master/crypto_stream/speck128256ctr/ // https://github.com/nsacyber/simon-speck-supercop/blob/master/crypto_stream/speck128256ctr/
@ -25,10 +25,13 @@
#ifndef SPECK_H #ifndef SPECK_H
#define SPECK_H #define SPECK_H
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include "portable_endian.h" #include "portable_endian.h"
#define u32 uint32_t #define u32 uint32_t
#define u64 uint64_t #define u64 uint64_t
@ -36,45 +39,60 @@
#define SPECK_KEY_BYTES (256/8) #define SPECK_KEY_BYTES (256/8)
#if defined (__AVX2__) #if defined (__AVX2__) // AVX support -----------------------------------------------------------------------------
#include <immintrin.h> #include <immintrin.h>
#define SPECK_ALIGNED_CTX 32
#define u256 __m256i #define u256 __m256i
#define SPECK_ALIGNED_CTX 32
typedef struct { typedef struct {
u256 rk[34]; u256 rk[34];
u64 key[34]; u64 key[34];
} speck_context_t; } speck_context_t;
#elif defined (__SSE2__)
#elif defined (__SSE2__) // SSE support ---------------------------------------------------------------------------
#include <immintrin.h> #include <immintrin.h>
#define u128 __m128i
#define SPECK_ALIGNED_CTX 16 #define SPECK_ALIGNED_CTX 16
#define SPECK_CTX_BYVAL 1 #define SPECK_CTX_BYVAL 1
#define u128 __m128i
typedef struct { typedef struct {
u128 rk[34]; u128 rk[34];
u64 key[34]; u64 key[34];
} speck_context_t; } speck_context_t;
#elif defined (__ARM_NEON)
#elif defined (__ARM_NEON) // NEON support ------------------------------------------------------------------------
#include <arm_neon.h> #include <arm_neon.h>
#define u128 uint64x2_t #define u128 uint64x2_t
typedef struct { typedef struct {
u128 rk[34]; u128 rk[34];
u64 key[34]; u64 key[34];
} speck_context_t; } speck_context_t;
#else
#else // plain C --------------------------------------------------------------------------------------------------
typedef struct { typedef struct {
u64 key[34]; u64 key[34];
} speck_context_t; } speck_context_t;
#endif
// ----- #endif // ---------------------------------------------------------------------------------------------------------
int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long inlen, int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long inlen,
const unsigned char *n, const unsigned char *n,
@ -84,14 +102,29 @@ int speck_init (const unsigned char *k, speck_context_t **ctx);
int speck_deinit (speck_context_t *ctx); int speck_deinit (speck_context_t *ctx);
// -----
// ----------------------------------------------------------------------------------------------------------------
// cipher SPECK -- 128 bit block size -- 128 bit key size -- CTR mode
// used for header encryption, thus the postfix '_he'
// for now: just plain C -- AVX, SSE, NEON do not make sense for short header
int speck_he (unsigned char *out, const unsigned char *in, unsigned long long inlen, int speck_he (unsigned char *out, const unsigned char *in, unsigned long long inlen,
const unsigned char *n, speck_context_t *ctx); const unsigned char *n, speck_context_t *ctx);
int speck_expand_key_he (const unsigned char *k, speck_context_t *ctx); int speck_expand_key_he (const unsigned char *k, speck_context_t *ctx);
// -----
// ----------------------------------------------------------------------------------------------------------------
// cipher SPECK -- 96 bit block size -- 96 bit key size -- ECB mode
// follows endianess rules as used in official implementation guide and NOT as in original 2013 cipher presentation
// used for IV in header encryption, thus the in/postfix 'he_iv'
// for now: just plain C -- probably no need for AVX, SSE, NEON
int speck_he_iv_encrypt (unsigned char *inout, speck_context_t *ctx); int speck_he_iv_encrypt (unsigned char *inout, speck_context_t *ctx);

View File

@ -24,7 +24,8 @@
#include "speck.h" #include "speck.h"
#if defined (__AVX2__) // AVX support ----------------------------------------------------
#if defined (__AVX2__) // AVX support ----------------------------------------------------------------------------
#define LCS(x,r) (((x)<<r)|((x)>>(64-r))) #define LCS(x,r) (((x)<<r)|((x)>>(64-r)))
@ -99,58 +100,59 @@
RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \ RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \
RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33)) RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33))
static int speck_encrypt_xor(unsigned char *out, const unsigned char *in, u64 nonce[], speck_context_t *ctx, int numbytes) { static int speck_encrypt_xor(unsigned char *out, const unsigned char *in, u64 nonce[], speck_context_t *ctx, int numbytes) {
u64 x[2], y[2]; u64 x[2], y[2];
u256 X[4], Y[4], Z[4]; u256 X[4], Y[4], Z[4];
if (numbytes == 16) { if(numbytes == 16) {
x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++;
Encrypt (x, y, ctx->key, 1); Encrypt(x, y, ctx->key, 1);
((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; ((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0];
return 0; return 0;
} }
if (numbytes == 32) { if(numbytes == 32) {
x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++;
x[1] = nonce[1]; y[1] = nonce[0]; nonce[0]++; x[1] = nonce[1]; y[1] = nonce[0]; nonce[0]++;
Encrypt (x , y, ctx->key, 2); Encrypt(x , y, ctx->key, 2);
((u64 *)out)[1] = x[0] ^ ((u64 *)in)[1]; ((u64 *)out)[0] = y[0] ^ ((u64 *)in)[0]; ((u64 *)out)[1] = x[0] ^ ((u64 *)in)[1]; ((u64 *)out)[0] = y[0] ^ ((u64 *)in)[0];
((u64 *)out)[3] = x[1] ^ ((u64 *)in)[3]; ((u64 *)out)[2] = y[1] ^ ((u64 *)in)[2]; ((u64 *)out)[3] = x[1] ^ ((u64 *)in)[3]; ((u64 *)out)[2] = y[1] ^ ((u64 *)in)[2];
return 0; return 0;
} }
SET1 (X[0], nonce[1]); SET4 (Y[0], nonce[0]); SET1(X[0], nonce[1]); SET4(Y[0], nonce[0]);
if (numbytes == 64) if(numbytes == 64)
Encrypt (X, Y, ctx->rk, 4); Encrypt(X, Y, ctx->rk, 4);
else { else {
X[1] = X[0]; X[1] = X[0];
Y[1] = ADD (Y[0], _four); Y[1] = ADD(Y[0], _four);
if (numbytes == 128) if(numbytes == 128)
Encrypt (X, Y, ctx->rk, 8); Encrypt(X, Y, ctx->rk, 8);
else { else {
X[2] = X[0]; X[2] = X[0];
Y[2] = ADD (Y[1], _four); Y[2] = ADD(Y[1], _four);
if (numbytes == 192) if(numbytes == 192)
Encrypt (X, Y, ctx->rk, 12); Encrypt(X, Y, ctx->rk, 12);
else { else {
X[3] = X[0]; X[3] = X[0];
Y[3] = ADD (Y[2], _four); Y[3] = ADD(Y[2], _four);
Encrypt (X, Y, ctx->rk, 16); Encrypt(X, Y, ctx->rk, 16);
} }
} }
} }
nonce[0] += (numbytes>>4); nonce[0] += (numbytes >> 4);
XOR_STORE (in, out, X[0], Y[0]); XOR_STORE(in, out, X[0], Y[0]);
if (numbytes >= 128) if (numbytes >= 128)
XOR_STORE (in + 64, out + 64, X[1], Y[1]); XOR_STORE(in + 64, out + 64, X[1], Y[1]);
if (numbytes >= 192) if(numbytes >= 192)
XOR_STORE (in + 128, out + 128, X[2], Y[2]); XOR_STORE(in + 128, out + 128, X[2], Y[2]);
if (numbytes >= 256) if(numbytes >= 256)
XOR_STORE (in + 192, out + 192, X[3], Y[3]); XOR_STORE(in + 192, out + 192, X[3], Y[3]);
return 0; return 0;
} }
@ -170,41 +172,41 @@ static int internal_speck_ctr(unsigned char *out, const unsigned char *in, unsig
nonce[0] = ((u64 *)n)[0]; nonce[0] = ((u64 *)n)[0];
nonce[1] = ((u64 *)n)[1]; nonce[1] = ((u64 *)n)[1];
while (inlen >= 256) { while(inlen >= 256) {
speck_encrypt_xor (out, in, nonce, ctx, 256); speck_encrypt_xor(out, in, nonce, ctx, 256);
in += 256; inlen -= 256; out += 256; in += 256; inlen -= 256; out += 256;
} }
if (inlen >= 192) { if(inlen >= 192) {
speck_encrypt_xor (out, in, nonce, ctx, 192); speck_encrypt_xor(out, in, nonce, ctx, 192);
in += 192; inlen -= 192; out += 192; in += 192; inlen -= 192; out += 192;
} }
if (inlen >= 128) { if(inlen >= 128) {
speck_encrypt_xor (out, in, nonce, ctx, 128); speck_encrypt_xor(out, in, nonce, ctx, 128);
in += 128; inlen -= 128; out += 128; in += 128; inlen -= 128; out += 128;
} }
if (inlen >= 64) { if(inlen >= 64) {
speck_encrypt_xor (out, in, nonce, ctx, 64); speck_encrypt_xor(out, in, nonce, ctx, 64);
in += 64; inlen -= 64; out += 64; in += 64; inlen -= 64; out += 64;
} }
if (inlen >= 32) { if(inlen >= 32) {
speck_encrypt_xor (out, in, nonce, ctx, 32); speck_encrypt_xor(out, in, nonce, ctx, 32);
in += 32; inlen -= 32; out += 32; in += 32; inlen -= 32; out += 32;
} }
if (inlen >= 16) { if(inlen >= 16) {
speck_encrypt_xor (block, in, nonce, ctx, 16); speck_encrypt_xor(block, in, nonce, ctx, 16);
((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0]; ((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0];
((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1]; ((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1];
in += 16; inlen -= 16; out += 16; in += 16; inlen -= 16; out += 16;
} }
if (inlen > 0) { if(inlen > 0) {
speck_encrypt_xor (block, in, nonce, ctx, 16); speck_encrypt_xor (block, in, nonce, ctx, 16);
for (i = 0; i < inlen; i++) for(i = 0; i < inlen; i++)
out[i] = block[i] ^ in[i]; out[i] = block[i] ^ in[i];
} }
@ -217,16 +219,16 @@ static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) {
u64 K[4]; u64 K[4];
size_t i; size_t i;
for (i = 0; i < numkeywords; i++) for(i = 0; i < numkeywords; i++)
K[i] = ((u64 *)k)[i]; K[i] = ((u64 *)k)[i];
EK (K[0], K[1], K[2], K[3], ctx->rk, ctx->key); EK(K[0], K[1], K[2], K[3], ctx->rk, ctx->key);
return 0; return 0;
} }
#elif defined (__SSE2__) // SSE support ------------------------------------------------------------ #elif defined (__SSE2__) // SSE support ---------------------------------------------------------------------------
#define LCS(x,r) (((x)<<r)|((x)>>(64-r))) #define LCS(x,r) (((x)<<r)|((x)>>(64-r)))
@ -311,45 +313,46 @@ static int speck_encrypt_xor (unsigned char *out, const unsigned char *in, u64 n
u64 x[2], y[2]; u64 x[2], y[2];
u128 X[4], Y[4], Z[4]; u128 X[4], Y[4], Z[4];
if (numbytes == 16) { if(numbytes == 16) {
x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++;
Encrypt (x, y, ctx.key, 1); Encrypt(x, y, ctx.key, 1);
((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; ((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0];
return 0; return 0;
} }
SET1 (X[0], nonce[1]); SET2 (Y[0], nonce[0]); SET1(X[0], nonce[1]); SET2 (Y[0], nonce[0]);
if (numbytes == 32) if(numbytes == 32)
Encrypt (X, Y, ctx.rk, 2); Encrypt(X, Y, ctx.rk, 2);
else { else {
X[1] = X[0]; Y[1] = ADD (Y[0], _two); X[1] = X[0]; Y[1] = ADD(Y[0], _two);
if (numbytes == 64) if(numbytes == 64)
Encrypt (X, Y, ctx.rk, 4); Encrypt(X, Y, ctx.rk, 4);
else { else {
X[2] = X[0]; Y[2] = ADD (Y[1], _two); X[2] = X[0]; Y[2] = ADD(Y[1], _two);
if (numbytes == 96) if(numbytes == 96)
Encrypt (X, Y, ctx.rk, 6); Encrypt(X, Y, ctx.rk, 6);
else { else {
X[3] = X[0]; Y[3] = ADD (Y[2], _two); X[3] = X[0]; Y[3] = ADD(Y[2], _two);
Encrypt (X, Y, ctx.rk, 8); Encrypt(X, Y, ctx.rk, 8);
} }
} }
} }
nonce[0] += (numbytes>>4); nonce[0] += (numbytes >> 4);
XOR_STORE (in, out, X[0], Y[0]); XOR_STORE(in, out, X[0], Y[0]);
if (numbytes >= 64) if(numbytes >= 64)
XOR_STORE (in + 32, out + 32, X[1], Y[1]); XOR_STORE(in + 32, out + 32, X[1], Y[1]);
if (numbytes >= 96) if(numbytes >= 96)
XOR_STORE (in + 64, out + 64, X[2], Y[2]); XOR_STORE(in + 64, out + 64, X[2], Y[2]);
if (numbytes >= 128) if(numbytes >= 128)
XOR_STORE (in + 96, out + 96, X[3], Y[3]); XOR_STORE(in + 96, out + 96, X[3], Y[3]);
return 0; return 0;
} }
// attention: ctx is provided by value as it is faster in this case, astonishingly // attention: ctx is provided by value as it is faster in this case, astonishingly
static int internal_speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long inlen, static int internal_speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long inlen,
const unsigned char *n, const speck_context_t ctx) { const unsigned char *n, const speck_context_t ctx) {
@ -359,42 +362,42 @@ static int internal_speck_ctr (unsigned char *out, const unsigned char *in, unsi
unsigned char block[16]; unsigned char block[16];
u64 * const block64 = (u64 *)block; u64 * const block64 = (u64 *)block;
if (!inlen) if(!inlen)
return 0; return 0;
nonce[0] = ((u64 *)n)[0]; nonce[0] = ((u64 *)n)[0];
nonce[1] = ((u64 *)n)[1]; nonce[1] = ((u64 *)n)[1];
while (inlen >= 128) { while(inlen >= 128) {
speck_encrypt_xor (out, in, nonce, ctx, 128); speck_encrypt_xor(out, in, nonce, ctx, 128);
in += 128; inlen -= 128; out += 128; in += 128; inlen -= 128; out += 128;
} }
if (inlen >= 96) { if(inlen >= 96) {
speck_encrypt_xor (out, in, nonce, ctx, 96); speck_encrypt_xor(out, in, nonce, ctx, 96);
in += 96; inlen -= 96; out += 96; in += 96; inlen -= 96; out += 96;
} }
if (inlen >= 64) { if(inlen >= 64) {
speck_encrypt_xor (out, in, nonce, ctx, 64); speck_encrypt_xor(out, in, nonce, ctx, 64);
in += 64; inlen -= 64; out += 64; in += 64; inlen -= 64; out += 64;
} }
if (inlen >= 32) { if(inlen >= 32) {
speck_encrypt_xor (out, in, nonce, ctx, 32); speck_encrypt_xor(out, in, nonce, ctx, 32);
in += 32; inlen -= 32; out += 32; in += 32; inlen -= 32; out += 32;
} }
if (inlen >= 16) { if(inlen >= 16) {
speck_encrypt_xor (block, in, nonce, ctx, 16); speck_encrypt_xor(block, in, nonce, ctx, 16);
((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0]; ((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0];
((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1]; ((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1];
in += 16; inlen -= 16; out += 16; in += 16; inlen -= 16; out += 16;
} }
if (inlen > 0) { if(inlen > 0) {
speck_encrypt_xor (block, in, nonce, ctx, 16); speck_encrypt_xor (block, in, nonce, ctx, 16);
for (i = 0; i < inlen; i++) for(i = 0; i < inlen; i++)
out[i] = block[i] ^ in[i]; out[i] = block[i] ^ in[i];
} }
@ -407,16 +410,16 @@ static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) {
u64 K[4]; u64 K[4];
size_t i; size_t i;
for (i = 0; i < numkeywords; i++) for(i = 0; i < numkeywords; i++)
K[i] = ((u64 *)k)[i]; K[i] = ((u64 *)k)[i];
EK (K[0], K[1], K[2], K[3], ctx->rk, ctx->key); EK(K[0], K[1], K[2], K[3], ctx->rk, ctx->key);
return 0; return 0;
} }
#elif defined (__ARM_NEON) // NEON support ------------------------------------------- #elif defined (__ARM_NEON) // NEON support -------------------------------------------------------------------
#define LCS(x,r) (((x)<<r)|((x)>>(64-r))) #define LCS(x,r) (((x)<<r)|((x)>>(64-r)))
@ -485,39 +488,39 @@ static int speck_encrypt_xor (unsigned char *out, const unsigned char *in, u64 n
u64 x[2], y[2]; u64 x[2], y[2];
u128 X[4], Y[4], Z[4]; u128 X[4], Y[4], Z[4];
if (numbytes == 16) { if(numbytes == 16) {
x[0] = nonce[1]; y[0]=nonce[0]; nonce[0]++; x[0] = nonce[1]; y[0]=nonce[0]; nonce[0]++;
Encrypt (x, y, ctx->key, 1); Encrypt(x, y, ctx->key, 1);
((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; ((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0];
return 0; return 0;
} }
SET1 (X[0], nonce[1]); SET2 (Y[0], nonce[0]); SET1(X[0], nonce[1]); SET2(Y[0], nonce[0]);
if (numbytes == 32) if(numbytes == 32)
Encrypt (X, Y, ctx->rk, 2); Encrypt(X, Y, ctx->rk, 2);
else { else {
X[1] = X[0]; SET2 (Y[1], nonce[0]); X[1] = X[0]; SET2(Y[1], nonce[0]);
if (numbytes == 64) if(numbytes == 64)
Encrypt (X, Y, ctx->rk, 4); Encrypt(X, Y, ctx->rk, 4);
else { else {
X[2] = X[0]; SET2 (Y[2], nonce[0]); X[2] = X[0]; SET2(Y[2], nonce[0]);
if (numbytes == 96) if(numbytes == 96)
Encrypt (X, Y, ctx->rk, 6); Encrypt(X, Y, ctx->rk, 6);
else { else {
X[3] = X[0]; SET2 (Y[3], nonce[0]); X[3] = X[0]; SET2(Y[3], nonce[0]);
Encrypt (X, Y, ctx->rk, 8); Encrypt(X, Y, ctx->rk, 8);
} }
} }
} }
XOR_STORE (in, out, X[0], Y[0]); XOR_STORE(in, out, X[0], Y[0]);
if (numbytes >= 64) if(numbytes >= 64)
XOR_STORE (in + 32, out + 32, X[1], Y[1]); XOR_STORE(in + 32, out + 32, X[1], Y[1]);
if (numbytes >= 96) if(numbytes >= 96)
XOR_STORE (in + 64, out + 64, X[2], Y[2]); XOR_STORE(in + 64, out + 64, X[2], Y[2]);
if (numbytes >= 128) if(numbytes >= 128)
XOR_STORE (in + 96, out + 96, X[3], Y[3]); XOR_STORE(in + 96, out + 96, X[3], Y[3]);
return 0; return 0;
} }
@ -531,42 +534,42 @@ static int internal_speck_ctr (unsigned char *out, const unsigned char *in, unsi
unsigned char block[16]; unsigned char block[16];
u64 *const block64 = (u64 *)block; u64 *const block64 = (u64 *)block;
if (!inlen) if(!inlen)
return 0; return 0;
nonce[0] = ((u64 *)n)[0]; nonce[0] = ((u64 *)n)[0];
nonce[1] = ((u64 *)n)[1]; nonce[1] = ((u64 *)n)[1];
while (inlen >= 128) { while(inlen >= 128) {
speck_encrypt_xor (out, in, nonce, ctx, 128); speck_encrypt_xor(out, in, nonce, ctx, 128);
in += 128; inlen -= 128; out += 128; in += 128; inlen -= 128; out += 128;
} }
if (inlen >= 96) { if(inlen >= 96) {
speck_encrypt_xor (out, in, nonce, ctx, 96); speck_encrypt_xor(out, in, nonce, ctx, 96);
in += 96; inlen -= 96; out += 96; in += 96; inlen -= 96; out += 96;
} }
if (inlen >= 64) { if(inlen >= 64) {
speck_encrypt_xor (out, in, nonce, ctx, 64); speck_encrypt_xor(out, in, nonce, ctx, 64);
in += 64; inlen -= 64; out += 64; in += 64; inlen -= 64; out += 64;
} }
if (inlen >= 32) { if(inlen >= 32) {
speck_encrypt_xor (out, in, nonce, ctx, 32); speck_encrypt_xor(out, in, nonce, ctx, 32);
in += 32; inlen -= 32; out += 32; in += 32; inlen -= 32; out += 32;
} }
if (inlen >= 16) { if(inlen >= 16) {
speck_encrypt_xor (block, in, nonce, ctx, 16); speck_encrypt_xor(block, in, nonce, ctx, 16);
((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0]; ((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0];
((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1]; ((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1];
in += 16; inlen -= 16; out += 16; in += 16; inlen -= 16; out += 16;
} }
if (inlen > 0) { if(inlen > 0) {
speck_encrypt_xor (block, in, nonce, ctx, 16); speck_encrypt_xor(block, in, nonce, ctx, 16);
for (i = 0; i < inlen; i++) for(i = 0; i < inlen; i++)
out[i] = block[i] ^ in[i]; out[i] = block[i] ^ in[i];
} }
@ -579,16 +582,16 @@ static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) {
u64 K[4]; u64 K[4];
size_t i; size_t i;
for (i = 0; i < numkeywords; i++) for(i = 0; i < numkeywords; i++)
K[i] = ((u64 *)k)[i]; K[i] = ((u64 *)k)[i];
EK (K[0], K[1], K[2], K[3], ctx->rk, ctx->key); EK(K[0], K[1], K[2], K[3], ctx->rk, ctx->key);
return 0; return 0;
} }
#else // plain C ---------------------------------------------------------------- #else // plain C ----------------------------------------------------------------------------------------
#define ROR(x,r) (((x)>>(r))|((x)<<(64-(r)))) #define ROR(x,r) (((x)>>(r))|((x)<<(64-(r))))
@ -600,8 +603,8 @@ static int speck_encrypt (u64 *u, u64 *v, speck_context_t *ctx) {
u64 i, x = *u, y = *v; u64 i, x = *u, y = *v;
for (i = 0; i < 34; i++) for(i = 0; i < 34; i++)
R (x, y, ctx->key[i]); R(x, y, ctx->key[i]);
*u = x; *v = y; *u = x; *v = y;
@ -613,33 +616,35 @@ static int internal_speck_ctr (unsigned char *out, const unsigned char *in, unsi
const unsigned char *n, speck_context_t *ctx) { const unsigned char *n, speck_context_t *ctx) {
u64 i, nonce[2], x, y, t; u64 i, nonce[2], x, y, t;
unsigned char *block = malloc (16); unsigned char *block = malloc(16);
if (!inlen) { if(!inlen) {
free (block); free(block);
return 0; return 0;
} }
nonce[0] = htole64 ( ((u64*)n)[0] ); nonce[0] = htole64( ((u64*)n)[0] );
nonce[1] = htole64 ( ((u64*)n)[1] ); nonce[1] = htole64( ((u64*)n)[1] );
t=0; t=0;
while (inlen >= 16) { while(inlen >= 16) {
x = nonce[1]; y = nonce[0]; nonce[0]++; x = nonce[1]; y = nonce[0]; nonce[0]++;
speck_encrypt (&x, &y, ctx); speck_encrypt(&x, &y, ctx);
((u64 *)out)[1+t] = htole64 (x ^ ((u64 *)in)[1+t]); ((u64 *)out)[1+t] = htole64(x ^ ((u64 *)in)[1+t]);
((u64 *)out)[0+t] = htole64 (y ^ ((u64 *)in)[0+t]); ((u64 *)out)[0+t] = htole64(y ^ ((u64 *)in)[0+t]);
t += 2; t += 2;
inlen -= 16; inlen -= 16;
} }
if (inlen > 0) {
if(inlen > 0) {
x = nonce[1]; y = nonce[0]; x = nonce[1]; y = nonce[0];
speck_encrypt (&x, &y, ctx); speck_encrypt(&x, &y, ctx);
((u64 *)block)[1] = htole64 (x); ((u64 *)block)[0] = htole64 (y); ((u64 *)block)[1] = htole64(x); ((u64 *)block)[0] = htole64(y);
for (i = 0; i < inlen; i++) for(i = 0; i < inlen; i++)
out[i + 8*t] = block[i] ^ in[i + 8*t]; out[i + 8*t] = block[i] ^ in[i + 8*t];
} }
free (block); free(block);
return 0; return 0;
} }
@ -649,23 +654,24 @@ static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) {
u64 K[4]; u64 K[4];
u64 i; u64 i;
for (i = 0; i < 4; i++) for(i = 0; i < 4; i++)
K[i] = htole64 ( ((u64 *)k)[i] ); K[i] = htole64( ((u64 *)k)[i] );
for (i = 0; i < 33; i += 3) { for(i = 0; i < 33; i += 3) {
ctx->key[i ] = K[0]; ctx->key[i ] = K[0];
R (K[1], K[0], i ); R(K[1], K[0], i );
ctx->key[i+1] = K[0]; ctx->key[i+1] = K[0];
R (K[2], K[0], i + 1); R(K[2], K[0], i + 1);
ctx->key[i+2] = K[0]; ctx->key[i+2] = K[0];
R (K[3], K[0], i + 2); R(K[3], K[0], i + 2);
} }
ctx->key[33] = K[0]; ctx->key[33] = K[0];
return 1; return 1;
} }
#endif // AVX, SSE, NEON, plain C #endif // AVX, SSE, NEON, plain C ------------------------------------------------------------------------
// this functions wraps the call to internal speck_ctr functions which have slightly different // this functions wraps the call to internal speck_ctr functions which have slightly different
@ -673,7 +679,7 @@ static int speck_expand_key (const unsigned char *k, speck_context_t *ctx) {
int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long inlen, int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long inlen,
const unsigned char *n, speck_context_t *ctx) { const unsigned char *n, speck_context_t *ctx) {
return internal_speck_ctr (out, in, inlen, n, return internal_speck_ctr(out, in, inlen, n,
#if defined (SPECK_CTX_BYVAL) #if defined (SPECK_CTX_BYVAL)
*ctx); *ctx);
#else #else
@ -685,9 +691,9 @@ int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long i
int speck_init (const unsigned char *k, speck_context_t **ctx) { int speck_init (const unsigned char *k, speck_context_t **ctx) {
#if defined (SPECK_ALIGNED_CTX) #if defined (SPECK_ALIGNED_CTX)
*ctx = (speck_context_t*) _mm_malloc (sizeof(speck_context_t), SPECK_ALIGNED_CTX); *ctx = (speck_context_t*)_mm_malloc(sizeof(speck_context_t), SPECK_ALIGNED_CTX);
#else #else
*ctx = (speck_context_t*) calloc (1, sizeof(speck_context_t)); *ctx = (speck_context_t*)calloc(1, sizeof(speck_context_t));
#endif #endif
if(!(*ctx)) { if(!(*ctx)) {
return -1; return -1;
@ -701,22 +707,24 @@ int speck_deinit (speck_context_t *ctx) {
if(ctx) { if(ctx) {
#if defined (SPECK_ALIGNED_CTX) #if defined (SPECK_ALIGNED_CTX)
_mm_free (ctx); _mm_free(ctx);
#else #else
free (ctx); free(ctx);
#endif #endif
} }
return 0; return 0;
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------------------------------
// cipher SPECK -- 128 bit block size -- 128 bit key size -- CTR mode // cipher SPECK -- 128 bit block size -- 128 bit key size -- CTR mode
// used for header encryption, thus the prefix 'he_' // used for header encryption, thus the postfix '_he'
// for now: just plain C -- AVX, SSE, NEON do not make sense for short header // for now: just plain C -- AVX, SSE, NEON do not make sense for short header
#define ROR64(x,r) (((x)>>(r))|((x)<<(64-(r)))) #define ROR64(x,r) (((x)>>(r))|((x)<<(64-(r))))
#define ROL64(x,r) (((x)<<(r))|((x)>>(64-(r)))) #define ROL64(x,r) (((x)<<(r))|((x)>>(64-(r))))
#define R64(x,y,k) (x=ROR64(x,8), x+=y, x^=k, y=ROL64(y,3), y^=x) #define R64(x,y,k) (x=ROR64(x,8), x+=y, x^=k, y=ROL64(y,3), y^=x)
@ -726,8 +734,8 @@ static int speck_encrypt_he (u64 *u, u64 *v, speck_context_t *ctx) {
u64 i, x=*u, y=*v; u64 i, x=*u, y=*v;
for (i = 0; i < 32; i++) for(i = 0; i < 32; i++)
R64 (x, y, ctx->key[i]); R64(x, y, ctx->key[i]);
*u = x; *v = y; *u = x; *v = y;
@ -741,32 +749,34 @@ int speck_he (unsigned char *out, const unsigned char *in, unsigned long long in
u64 i, nonce[2], x, y, t; u64 i, nonce[2], x, y, t;
unsigned char *block = malloc(16); unsigned char *block = malloc(16);
if (!inlen) { if(!inlen) {
free (block); free(block);
return 0; return 0;
} }
nonce[0] = htole64 ( ((u64*)n)[0] ); nonce[0] = htole64 ( ((u64*)n)[0] );
nonce[1] = htole64 ( ((u64*)n)[1] ); nonce[1] = htole64 ( ((u64*)n)[1] );
t = 0; t = 0;
while (inlen >= 16) { while(inlen >= 16) {
x = nonce[1]; y = nonce[0]; nonce[0]++; x = nonce[1]; y = nonce[0]; nonce[0]++;
speck_encrypt_he (&x, &y, ctx); speck_encrypt_he(&x, &y, ctx);
((u64 *)out)[1+t] = htole64 (x ^ ((u64 *)in)[1+t]); ((u64 *)out)[1+t] = htole64(x ^ ((u64 *)in)[1+t]);
((u64 *)out)[0+t] = htole64 (y ^ ((u64 *)in)[0+t]); ((u64 *)out)[0+t] = htole64(y ^ ((u64 *)in)[0+t]);
t += 2; t += 2;
inlen -= 16; inlen -= 16;
} }
if (inlen > 0) { if(inlen > 0) {
x = nonce[1]; y = nonce[0]; x = nonce[1]; y = nonce[0];
speck_encrypt_he (&x, &y, ctx); speck_encrypt_he(&x, &y, ctx);
((u64 *)block)[1] = htole64 (x); ((u64 *)block)[0] = htole64 (y); ((u64 *)block)[1] = htole64(x); ((u64 *)block)[0] = htole64(y);
for (i = 0; i < inlen; i++) for(i = 0; i < inlen; i++)
out[i+8*t] = block[i] ^ in[i+8*t]; out[i+8*t] = block[i] ^ in[i+8*t];
} }
free(block); free(block);
return 0; return 0;
} }
@ -776,25 +786,27 @@ int speck_expand_key_he (const unsigned char *k, speck_context_t *ctx) {
u64 A, B; u64 A, B;
u64 i; u64 i;
A = htole64 ( ((u64 *)k)[0] ); A = htole64( ((u64 *)k)[0] );
B = htole64 ( ((u64 *)k)[1] ); B = htole64( ((u64 *)k)[1] );
for (i = 0; i < 32; i++) { for(i = 0; i < 32; i++) {
ctx->key[i] = A; ctx->key[i] = A;
R64 ( B, A, i); R64(B, A, i);
} }
return 1; return 1;
} }
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------------------------------
// cipher SPECK -- 96 bit block size -- 96 bit key size -- ECB mode // cipher SPECK -- 96 bit block size -- 96 bit key size -- ECB mode
// follows endianess rules as used in official implementation guide and NOT as in original 2013 cipher presentation // follows endianess rules as used in official implementation guide and NOT as in original 2013 cipher presentation
// used for IV in header encryption, thus the prefix 'he_iv_' // used for IV in header encryption, thus the in/postfix 'he_iv'
// for now: just plain C -- probably no need for AVX, SSE, NEON // for now: just plain C -- probably no need for AVX, SSE, NEON
// prerequisite: lower 16 bit reset // prerequisite: lower 16 bit reset
#define ROTL48(x,r) (((((x)<<(r)) | (x>>(48-(r)))) >> 16) << 16) #define ROTL48(x,r) (((((x)<<(r)) | (x>>(48-(r)))) >> 16) << 16)
#define ROTR48(x,r) (((((x)>>(r)) | ((x)<<(48-(r)))) >> 16) << 16) #define ROTR48(x,r) (((((x)>>(r)) | ((x)<<(48-(r)))) >> 16) << 16)
@ -807,17 +819,17 @@ int speck_he_iv_encrypt (unsigned char *inout, speck_context_t *ctx) {
u64 x, y; u64 x, y;
int i; int i;
x = htole64 ( *(u64*)&inout[0] ); x <<= 16; x = htole64( *(u64*)&inout[0] ); x <<= 16;
y = htole64 ( *(u64*)&inout[4] ); y >>= 16; y <<= 16; y = htole64( *(u64*)&inout[4] ); y >>= 16; y <<= 16;
for (i = 0; i < 28; i++) for(i = 0; i < 28; i++)
ER96 (y, x, ctx->key[i]); ER96(y, x, ctx->key[i]);
x >>= 16; x |= y << 32; x >>= 16; x |= y << 32;
y >>= 32; y >>= 32;
((u64*)inout)[0] = le64toh (x); ((u64*)inout)[0] = le64toh(x);
((u32*)inout)[2] = le32toh (y); ((u32*)inout)[2] = le32toh(y);
return 0; return 0;
} }
@ -828,17 +840,17 @@ int speck_he_iv_decrypt (unsigned char *inout, speck_context_t *ctx) {
u64 x, y; u64 x, y;
int i; int i;
x = htole64 ( *(u64*)&inout[0] ); x <<= 16; x = htole64( *(u64*)&inout[0] ); x <<= 16;
y = htole64 ( *(u64*)&inout[4] ); y >>= 16; y <<= 16; y = htole64( *(u64*)&inout[4] ); y >>= 16; y <<= 16;
for (i = 27; i >= 0; i--) for(i = 27; i >= 0; i--)
DR96 (y, x, ctx->key[i]); DR96(y, x, ctx->key[i]);
x >>= 16; x |= y << 32; x >>= 16; x |= y << 32;
y >>= 32; y >>= 32;
((u64*)inout)[0] = le64toh (x); ((u64*)inout)[0] = le64toh(x);
((u32*)inout)[2] = le32toh (y); ((u32*)inout)[2] = le32toh(y);
return 0; return 0;
} }
@ -849,12 +861,12 @@ int speck_expand_key_he_iv (const unsigned char *k, speck_context_t *ctx) {
u64 A, B; u64 A, B;
int i; int i;
A = htole64 ( *(u64 *)&k[0] ); A <<= 16; A = htole64( *(u64 *)&k[0] ); A <<= 16;
B = htole64 ( *(u64 *)&k[4] ); B >>= 16; B <<= 16; B = htole64( *(u64 *)&k[4] ); B >>= 16; B <<= 16;
for (i = 0; i < 28; i++) { for(i = 0; i < 28; i++) {
ctx->key[i] = A; ctx->key[i] = A;
ER96 ( B, A, i << 16); ER96(B, A, i << 16);
} }
return 1; return 1;