n2n/src/tf.c
2020-08-29 22:38:27 +05:45

514 lines
21 KiB
C

/**
* (C) 2007-20 - ntop.org and contributors
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not see see <http://www.gnu.org/licenses/>
*
*/
// taken (and modified) from github/fudanchii/twofish as of August 2020
// which itself is a modified copy of Andrew T. Csillag's implementation
// published on github/drewcsillag/twofish
/*
The MIT License (MIT)
Copyright (c) 2015 Andrew T. Csillag
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "tf.h"
const uint8_t RS[4][8] = { { 0x01, 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E, },
{ 0xA4, 0x56, 0x82, 0xF3, 0x1E, 0xC6, 0x68, 0xE5, },
{ 0x02, 0xA1, 0xFC, 0xC1, 0x47, 0xAE, 0x3D, 0x19, },
{ 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E, 0x03 } };
const uint8_t Q0[] = { 0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78, 0xE4, 0xDD, 0xD1, 0x38,
0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C, 0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48,
0xF2, 0xD0, 0x8B, 0x30, 0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82,
0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE, 0x16, 0x0C, 0xE3, 0x61,
0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B, 0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1,
0xE1, 0xE6, 0xBD, 0x45, 0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7,
0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF, 0x33, 0xC9, 0x62, 0x71,
0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8, 0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7,
0xA1, 0x1D, 0xAA, 0xED, 0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90,
0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B, 0x5F, 0x93, 0x0A, 0xEF,
0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B, 0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64,
0x2A, 0xCE, 0xCB, 0x2F, 0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A,
0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17, 0x55, 0x1F, 0x8A, 0x7D,
0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72, 0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34,
0x6E, 0x50, 0xDE, 0x68, 0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4,
0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42, 0x4A, 0x5E, 0xC1, 0xE0 };
const uint8_t Q1[] = { 0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B, 0x45, 0x7D, 0xE8, 0x4B,
0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1, 0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F,
0x5E, 0xBA, 0xAE, 0x5B, 0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5,
0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54, 0x92, 0x74, 0x36, 0x51,
0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96, 0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C,
0x13, 0x95, 0x9C, 0xC7, 0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8,
0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF, 0x40, 0xE7, 0x2B, 0xE2,
0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9, 0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17,
0x66, 0x94, 0xA1, 0x1D, 0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E,
0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21, 0xC4, 0x1A, 0xEB, 0xD9,
0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01, 0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48,
0x4F, 0xF2, 0x65, 0x8E, 0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64,
0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44, 0xE0, 0x4D, 0x43, 0x69,
0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E, 0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC,
0x22, 0xC9, 0xC0, 0x9B, 0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9,
0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56, 0x55, 0x09, 0xBE, 0x91 };
const uint8_t mult5B[] = { 0x00, 0x5B, 0xB6, 0xED, 0x05, 0x5E, 0xB3, 0xE8, 0x0A, 0x51, 0xBC, 0xE7, 0x0F, 0x54, 0xB9, 0xE2,
0x14, 0x4F, 0xA2, 0xF9, 0x11, 0x4A, 0xA7, 0xFC, 0x1E, 0x45, 0xA8, 0xF3, 0x1B, 0x40, 0xAD, 0xF6,
0x28, 0x73, 0x9E, 0xC5, 0x2D, 0x76, 0x9B, 0xC0, 0x22, 0x79, 0x94, 0xCF, 0x27, 0x7C, 0x91, 0xCA,
0x3C, 0x67, 0x8A, 0xD1, 0x39, 0x62, 0x8F, 0xD4, 0x36, 0x6D, 0x80, 0xDB, 0x33, 0x68, 0x85, 0xDE,
0x50, 0x0B, 0xE6, 0xBD, 0x55, 0x0E, 0xE3, 0xB8, 0x5A, 0x01, 0xEC, 0xB7, 0x5F, 0x04, 0xE9, 0xB2,
0x44, 0x1F, 0xF2, 0xA9, 0x41, 0x1A, 0xF7, 0xAC, 0x4E, 0x15, 0xF8, 0xA3, 0x4B, 0x10, 0xFD, 0xA6,
0x78, 0x23, 0xCE, 0x95, 0x7D, 0x26, 0xCB, 0x90, 0x72, 0x29, 0xC4, 0x9F, 0x77, 0x2C, 0xC1, 0x9A,
0x6C, 0x37, 0xDA, 0x81, 0x69, 0x32, 0xDF, 0x84, 0x66, 0x3D, 0xD0, 0x8B, 0x63, 0x38, 0xD5, 0x8E,
0xA0, 0xFB, 0x16, 0x4D, 0xA5, 0xFE, 0x13, 0x48, 0xAA, 0xF1, 0x1C, 0x47, 0xAF, 0xF4, 0x19, 0x42,
0xB4, 0xEF, 0x02, 0x59, 0xB1, 0xEA, 0x07, 0x5C, 0xBE, 0xE5, 0x08, 0x53, 0xBB, 0xE0, 0x0D, 0x56,
0x88, 0xD3, 0x3E, 0x65, 0x8D, 0xD6, 0x3B, 0x60, 0x82, 0xD9, 0x34, 0x6F, 0x87, 0xDC, 0x31, 0x6A,
0x9C, 0xC7, 0x2A, 0x71, 0x99, 0xC2, 0x2F, 0x74, 0x96, 0xCD, 0x20, 0x7B, 0x93, 0xC8, 0x25, 0x7E,
0xF0, 0xAB, 0x46, 0x1D, 0xF5, 0xAE, 0x43, 0x18, 0xFA, 0xA1, 0x4C, 0x17, 0xFF, 0xA4, 0x49, 0x12,
0xE4, 0xBF, 0x52, 0x09, 0xE1, 0xBA, 0x57, 0x0C, 0xEE, 0xB5, 0x58, 0x03, 0xEB, 0xB0, 0x5D, 0x06,
0xD8, 0x83, 0x6E, 0x35, 0xDD, 0x86, 0x6B, 0x30, 0xD2, 0x89, 0x64, 0x3F, 0xD7, 0x8C, 0x61, 0x3A,
0xCC, 0x97, 0x7A, 0x21, 0xC9, 0x92, 0x7F, 0x24, 0xC6, 0x9D, 0x70, 0x2B, 0xC3, 0x98, 0x75, 0x2E };
const uint8_t multEF[] = { 0x00, 0xEF, 0xB7, 0x58, 0x07, 0xE8, 0xB0, 0x5F, 0x0E, 0xE1, 0xB9, 0x56, 0x09, 0xE6, 0xBE, 0x51,
0x1C, 0xF3, 0xAB, 0x44, 0x1B, 0xF4, 0xAC, 0x43, 0x12, 0xFD, 0xA5, 0x4A, 0x15, 0xFA, 0xA2, 0x4D,
0x38, 0xD7, 0x8F, 0x60, 0x3F, 0xD0, 0x88, 0x67, 0x36, 0xD9, 0x81, 0x6E, 0x31, 0xDE, 0x86, 0x69,
0x24, 0xCB, 0x93, 0x7C, 0x23, 0xCC, 0x94, 0x7B, 0x2A, 0xC5, 0x9D, 0x72, 0x2D, 0xC2, 0x9A, 0x75,
0x70, 0x9F, 0xC7, 0x28, 0x77, 0x98, 0xC0, 0x2F, 0x7E, 0x91, 0xC9, 0x26, 0x79, 0x96, 0xCE, 0x21,
0x6C, 0x83, 0xDB, 0x34, 0x6B, 0x84, 0xDC, 0x33, 0x62, 0x8D, 0xD5, 0x3A, 0x65, 0x8A, 0xD2, 0x3D,
0x48, 0xA7, 0xFF, 0x10, 0x4F, 0xA0, 0xF8, 0x17, 0x46, 0xA9, 0xF1, 0x1E, 0x41, 0xAE, 0xF6, 0x19,
0x54, 0xBB, 0xE3, 0x0C, 0x53, 0xBC, 0xE4, 0x0B, 0x5A, 0xB5, 0xED, 0x02, 0x5D, 0xB2, 0xEA, 0x05,
0xE0, 0x0F, 0x57, 0xB8, 0xE7, 0x08, 0x50, 0xBF, 0xEE, 0x01, 0x59, 0xB6, 0xE9, 0x06, 0x5E, 0xB1,
0xFC, 0x13, 0x4B, 0xA4, 0xFB, 0x14, 0x4C, 0xA3, 0xF2, 0x1D, 0x45, 0xAA, 0xF5, 0x1A, 0x42, 0xAD,
0xD8, 0x37, 0x6F, 0x80, 0xDF, 0x30, 0x68, 0x87, 0xD6, 0x39, 0x61, 0x8E, 0xD1, 0x3E, 0x66, 0x89,
0xC4, 0x2B, 0x73, 0x9C, 0xC3, 0x2C, 0x74, 0x9B, 0xCA, 0x25, 0x7D, 0x92, 0xCD, 0x22, 0x7A, 0x95,
0x90, 0x7F, 0x27, 0xC8, 0x97, 0x78, 0x20, 0xCF, 0x9E, 0x71, 0x29, 0xC6, 0x99, 0x76, 0x2E, 0xC1,
0x8C, 0x63, 0x3B, 0xD4, 0x8B, 0x64, 0x3C, 0xD3, 0x82, 0x6D, 0x35, 0xDA, 0x85, 0x6A, 0x32, 0xDD,
0xA8, 0x47, 0x1F, 0xF0, 0xAF, 0x40, 0x18, 0xF7, 0xA6, 0x49, 0x11, 0xFE, 0xA1, 0x4E, 0x16, 0xF9,
0xB4, 0x5B, 0x03, 0xEC, 0xB3, 0x5C, 0x04, 0xEB, 0xBA, 0x55, 0x0D, 0xE2, 0xBD, 0x52, 0x0A, 0xE5 };
#define RS_MOD 0x14D
#define RHO 0x01010101L
#define ROL(x,n) (((x) << ((n) & 0x1F)) | ((x) >> (32-((n) & 0x1F))))
#define ROR(x,n) (((x) >> ((n) & 0x1F)) | ((x) << (32-((n) & 0x1F))))
#define _b(x, N) (((x) >> (N*8)) & 0xFF)
#define b0(x) ((uint8_t)(x))
#define b1(x) ((uint8_t)((x) >> 8))
#define b2(x) ((uint8_t)((x) >> 16))
#define b3(x) ((uint8_t)((x) >> 24))
#define U8ARRAY_TO_U32(r) ((r[0] << 24) ^ (r[1] << 16) ^ (r[2] << 8) ^ r[3])
#define U8S_TO_U32(r0, r1, r2, r3) ((r0 << 24) ^ (r1 << 16) ^ (r2 << 8) ^ r3)
/* multiply two polynomials represented as u32's, actually called with bytes */
uint32_t polyMult(uint32_t a, uint32_t b) {
uint32_t t=0;
while(a) {
if(a&1) t^=b;
b <<= 1;
a >>= 1;
}
return t;
}
/* take the polynomial t and return the t % modulus in GF(256) */
uint32_t gfMod(uint32_t t, uint32_t modulus) {
int i;
uint32_t tt;
modulus <<= 7;
for(i = 0; i < 8; i++) {
tt = t ^ modulus;
if(tt < t) t = tt;
modulus >>= 1;
}
return t;
}
/*multiply a and b and return the modulus */
#define gfMult(a, b, modulus) gfMod(polyMult(a, b), modulus)
/* return a u32 containing the result of multiplying the RS Code matrix by the sd matrix */
uint32_t RSMatrixMultiply(uint8_t sd[8]) {
int j, k;
uint8_t t;
uint8_t result[4];
for(j = 0; j < 4; j++) {
t = 0;
for(k = 0; k < 8; k++) {
t ^= gfMult(RS[j][k], sd[k], RS_MOD);
}
result[3-j] = t;
}
return U8ARRAY_TO_U32(result);
}
/* the Zero-keyed h function (used by the key setup routine) */
uint32_t h(uint32_t X, uint32_t L[4], int k) {
uint8_t y0, y1, y2, y3;
uint8_t z0, z1, z2, z3;
y0 = b0(X);
y1 = b1(X);
y2 = b2(X);
y3 = b3(X);
switch(k) {
case 4:
y0 = Q1[y0] ^ b0(L[3]);
y1 = Q0[y1] ^ b1(L[3]);
y2 = Q0[y2] ^ b2(L[3]);
y3 = Q1[y3] ^ b3(L[3]);
case 3:
y0 = Q1[y0] ^ b0(L[2]);
y1 = Q1[y1] ^ b1(L[2]);
y2 = Q0[y2] ^ b2(L[2]);
y3 = Q0[y3] ^ b3(L[2]);
case 2:
y0 = Q1[ Q0 [ Q0[y0] ^ b0(L[1]) ] ^ b0(L[0]) ];
y1 = Q0[ Q0 [ Q1[y1] ^ b1(L[1]) ] ^ b1(L[0]) ];
y2 = Q1[ Q1 [ Q0[y2] ^ b2(L[1]) ] ^ b2(L[0]) ];
y3 = Q0[ Q1 [ Q1[y3] ^ b3(L[1]) ] ^ b3(L[0]) ];
}
/* inline the MDS matrix multiply */
z0 = multEF[y0] ^ y1 ^ multEF[y2] ^ mult5B[y3];
z1 = multEF[y0] ^ mult5B[y1] ^ y2 ^ multEF[y3];
z2 = mult5B[y0] ^ multEF[y1] ^ multEF[y2] ^ y3;
z3 = y0 ^ multEF[y1] ^ mult5B[y2] ^ mult5B[y3];
return U8S_TO_U32(z0, z1, z2, z3);
}
/* given the Sbox keys, create the fully keyed QF */
void fullKey(uint32_t L[4], int k, uint32_t QF[4][256]) {
uint8_t y0, y1, y2, y3;
int i;
/* for all input values to the Q permutations */
for(i=0; i<256; i++) {
/* run the Q permutations */
y0 = i; y1=i; y2=i; y3=i;
switch(k) {
case 4:
y0 = Q1[y0] ^ b0(L[3]);
y1 = Q0[y1] ^ b1(L[3]);
y2 = Q0[y2] ^ b2(L[3]);
y3 = Q1[y3] ^ b3(L[3]);
case 3:
y0 = Q1[y0] ^ b0(L[2]);
y1 = Q1[y1] ^ b1(L[2]);
y2 = Q0[y2] ^ b2(L[2]);
y3 = Q0[y3] ^ b3(L[2]);
case 2:
y0 = Q1[ Q0 [ Q0[y0] ^ b0(L[1]) ] ^ b0(L[0]) ];
y1 = Q0[ Q0 [ Q1[y1] ^ b1(L[1]) ] ^ b1(L[0]) ];
y2 = Q1[ Q1 [ Q0[y2] ^ b2(L[1]) ] ^ b2(L[0]) ];
y3 = Q0[ Q1 [ Q1[y3] ^ b3(L[1]) ] ^ b3(L[0]) ];
}
/* now do the partial MDS matrix multiplies */
QF[0][i] = ((multEF[y0] << 24)
| (multEF[y0] << 16)
| (mult5B[y0] << 8)
| y0);
QF[1][i] = ((y1 << 24)
| (mult5B[y1] << 16)
| (multEF[y1] << 8)
| multEF[y1]);
QF[2][i] = ((multEF[y2] << 24)
| (y2 << 16)
| (multEF[y2] << 8)
| mult5B[y2]);
QF[3][i] = ((mult5B[y3] << 24)
| (multEF[y3] << 16)
| (y3 << 8)
| mult5B[y3]);
}
}
// -------------------------------------------------------------------------------------
/* fully keyed h (aka g) function */
#define fkh(X) (S[0][b0(X)]^S[1][b1(X)]^S[2][b2(X)]^S[3][b3(X)])
// -------------------------------------------------------------------------------------
/* one encryption round */
#define ENC_ROUND(R0, R1, R2, R3, round) \
T0 = fkh(R0); \
T1 = fkh(ROL(R1, 8)); \
R2 = ROR(R2 ^ (T1 + T0 + K[2*round+8]), 1); \
R3 = ROL(R3, 1) ^ (2*T1 + T0 + K[2*round+9]);
void twofish_internal_encrypt(uint32_t K[40], uint32_t S[4][256], uint8_t PT[16]) {
uint32_t R0, R1, R2, R3;
uint32_t T0, T1;
/* load/byteswap/whiten input */
R3 = K[3] ^ le32toh(((uint32_t*)PT)[3]);
R2 = K[2] ^ le32toh(((uint32_t*)PT)[2]);
R1 = K[1] ^ le32toh(((uint32_t*)PT)[1]);
R0 = K[0] ^ le32toh(((uint32_t*)PT)[0]);
ENC_ROUND(R0, R1, R2, R3, 0);
ENC_ROUND(R2, R3, R0, R1, 1);
ENC_ROUND(R0, R1, R2, R3, 2);
ENC_ROUND(R2, R3, R0, R1, 3);
ENC_ROUND(R0, R1, R2, R3, 4);
ENC_ROUND(R2, R3, R0, R1, 5);
ENC_ROUND(R0, R1, R2, R3, 6);
ENC_ROUND(R2, R3, R0, R1, 7);
ENC_ROUND(R0, R1, R2, R3, 8);
ENC_ROUND(R2, R3, R0, R1, 9);
ENC_ROUND(R0, R1, R2, R3, 10);
ENC_ROUND(R2, R3, R0, R1, 11);
ENC_ROUND(R0, R1, R2, R3, 12);
ENC_ROUND(R2, R3, R0, R1, 13);
ENC_ROUND(R0, R1, R2, R3, 14);
ENC_ROUND(R2, R3, R0, R1, 15);
/* load/byteswap/whiten output */
((uint32_t*)PT)[3] = htole32(R1 ^ K[7]);
((uint32_t*)PT)[2] = htole32(R0 ^ K[6]);
((uint32_t*)PT)[1] = htole32(R3 ^ K[5]);
((uint32_t*)PT)[0] = htole32(R2 ^ K[4]);
}
// -------------------------------------------------------------------------------------
/* one decryption round */
#define DEC_ROUND(R0, R1, R2, R3, round) \
T0 = fkh(R0); \
T1 = fkh(ROL(R1, 8)); \
R2 = ROL(R2, 1) ^ (T0 + T1 + K[2*round+8]); \
R3 = ROR(R3 ^ (T0 + 2*T1 + K[2*round+9]), 1);
void twofish_internal_decrypt(uint32_t K[40], uint32_t S[4][256], uint8_t PT[16], const uint8_t CT[16]) {
uint32_t T0, T1;
uint32_t R0, R1, R2, R3;
/* load/byteswap/whiten input */
R3 = K[7] ^ le32toh(((uint32_t*)CT)[3]);
R2 = K[6] ^ le32toh(((uint32_t*)CT)[2]);
R1 = K[5] ^ le32toh(((uint32_t*)CT)[1]);
R0 = K[4] ^ le32toh(((uint32_t*)CT)[0]);
DEC_ROUND(R0, R1, R2, R3, 15);
DEC_ROUND(R2, R3, R0, R1, 14);
DEC_ROUND(R0, R1, R2, R3, 13);
DEC_ROUND(R2, R3, R0, R1, 12);
DEC_ROUND(R0, R1, R2, R3, 11);
DEC_ROUND(R2, R3, R0, R1, 10);
DEC_ROUND(R0, R1, R2, R3, 9);
DEC_ROUND(R2, R3, R0, R1, 8);
DEC_ROUND(R0, R1, R2, R3, 7);
DEC_ROUND(R2, R3, R0, R1, 6);
DEC_ROUND(R0, R1, R2, R3, 5);
DEC_ROUND(R2, R3, R0, R1, 4);
DEC_ROUND(R0, R1, R2, R3, 3);
DEC_ROUND(R2, R3, R0, R1, 2);
DEC_ROUND(R0, R1, R2, R3, 1);
DEC_ROUND(R2, R3, R0, R1, 0);
/* load/byteswap/whiten output */
((uint32_t*)PT)[3] = htole32(R1 ^ K[3]);
((uint32_t*)PT)[2] = htole32(R0 ^ K[2]);
((uint32_t*)PT)[1] = htole32(R3 ^ K[1]);
((uint32_t*)PT)[0] = htole32(R2 ^ K[0]);
}
// -------------------------------------------------------------------------------------
/* the key schedule routine */
void keySched(const uint8_t M[], int N, uint32_t **S, uint32_t K[40], int *k) {
uint32_t Mo[4], Me[4];
int i, j;
uint8_t vector[8];
uint32_t A, B;
*k = (N + 63) / 64;
*S = (uint32_t*)malloc(sizeof(uint32_t) * (*k));
for(i = 0; i < *k; i++) {
Me[i] = le32toh(((uint32_t*)M)[2*i]);
Mo[i] = le32toh(((uint32_t*)M)[2*i+1]);
}
for(i = 0; i < *k; i++) {
for(j = 0; j < 4; j++)
vector[j] = _b(Me[i], j);
for(j = 0; j < 4; j++)
vector[j+4] = _b(Mo[i], j);
(*S)[(*k)-i-1] = RSMatrixMultiply(vector);
}
for(i = 0; i < 20; i++) {
A = h(2*i*RHO, Me, *k);
B = ROL(h(2*i*RHO + RHO, Mo, *k), 8);
K[2*i] = A+B;
K[2*i+1] = ROL(A + 2*B, 9);
}
}
// -------------------------------------------------------------------------------------
// test field
//#define fix_xor(target, source) for (int _i = 0; _i < 16; _i++) { (target)[_i] = (target)[_i] ^ (source)[_i]; }
//#define fix_xor(target, source) for (int _i = 0; _i < 16; _i+=4) { *(uint32_t*)&(target)[_i] = *(uint32_t*)&(target)[_i] ^ *(uint32_t*)&(source)[_i]; }
#define fix_xor(target, source) *(uint32_t*)&(target)[0] = *(uint32_t*)&(target)[0] ^ *(uint32_t*)&(source)[0]; *(uint32_t*)&(target)[4] = *(uint32_t*)&(target)[4] ^ *(uint32_t*)&(source)[4]; \
*(uint32_t*)&(target)[8] = *(uint32_t*)&(target)[8] ^ *(uint32_t*)&(source)[8]; *(uint32_t*)&(target)[12] = *(uint32_t*)&(target)[12] ^ *(uint32_t*)&(source)[12];
//#define fix_xor(target, source) *(uint64_t*)&(target)[0] = *(uint64_t*)&(target)[0] ^ *(uint64_t*)&(source)[0]; *(uint64_t*)&(target)[8] = *(uint64_t*)&(target)[8] ^ *(uint64_t*)&(source)[8];
//#include <immintrin.h>
//#define fix_xor(target, source) __m128i target128 = _mm_loadu_si128((__m128i*)target); __m128i source128 = _mm_loadu_si128((__m128i*)source); target128 = _mm_xor_si128(target128, source128); _mm_storeu_si128((__m128i*)(target), target128);
// -------------------------------------------------------------------------------------
/** public API **/
int tf_ecb_decrypt (unsigned char *out, const unsigned char *in, tf_context_t *ctx) {
twofish_internal_decrypt(ctx->K, ctx->QF, out, in);
return TF_BLOCK_SIZE;
}
// not used
int tf_ecb_encrypt (unsigned char *out, const unsigned char *in, tf_context_t *ctx) {
memcpy (out, in, TF_BLOCK_SIZE);
twofish_internal_encrypt(ctx->K, ctx->QF, out);
return TF_BLOCK_SIZE;
}
int tf_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len,
const unsigned char *iv, tf_context_t *ctx) {
uint8_t tmp[TF_BLOCK_SIZE];
size_t i;
size_t n;
memcpy(tmp, iv, TF_BLOCK_SIZE);
n = in_len / TF_BLOCK_SIZE;
for(i=0; i < n; i++) {
fix_xor(tmp, &in[i * TF_BLOCK_SIZE]);
twofish_internal_encrypt(ctx->K, ctx->QF, tmp);
memcpy(&out[i * TF_BLOCK_SIZE], tmp, TF_BLOCK_SIZE);
}
return n * TF_BLOCK_SIZE;
}
int tf_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len,
const unsigned char *iv, tf_context_t *ctx) {
uint8_t tmp[TF_BLOCK_SIZE];
uint8_t old[TF_BLOCK_SIZE];
size_t i;
size_t n;
memcpy(tmp, iv, TF_BLOCK_SIZE);
n = in_len / TF_BLOCK_SIZE;
for(i=0; i < n; i++) {
memcpy(old, &in[i * TF_BLOCK_SIZE], TF_BLOCK_SIZE);
twofish_internal_decrypt(ctx->K, ctx->QF, &out[i * TF_BLOCK_SIZE], &in[i * TF_BLOCK_SIZE]);
fix_xor(&out[i * TF_BLOCK_SIZE], tmp);
memcpy(tmp, old, TF_BLOCK_SIZE);
}
return n * TF_BLOCK_SIZE;
}
/**
* By definition twofish can only accept key up to 256 bit
* we wont do any checking here and will assume user already
* know about it. Twofish is undefined for key larger than 256 bit
*/
int tf_init (const unsigned char *key, size_t key_size, tf_context_t **ctx) {
int k;
uint32_t *S;
*ctx = calloc(1, sizeof(tf_context_t));
if(!(*ctx)) {
return -1;
}
(*ctx)->N = key_size;
keySched(key, key_size, &S, (*ctx)->K, &k);
fullKey(S, k, (*ctx)->QF);
free(S); // allocated in keySched(...)
return 0;
}
int tf_deinit (tf_context_t *ctx) {
if (ctx) free (ctx);
return 0;
}