commit 3a86cc9ebd615655f25540204b9abbdab097cb34
parent 36a529a9feb4f6e5b2b7135ee66475250f78fb86
Author: vaplv <vaplv@free.fr>
Date: Tue, 6 Sep 2022 18:16:56 +0200
Optimize the sha256 implementation
Diffstat:
| M | src/hash.c | | | 157 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------- |
1 file changed, 114 insertions(+), 43 deletions(-)
diff --git a/src/hash.c b/src/hash.c
@@ -43,32 +43,21 @@ static const uint32_t k[64] = {
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
-/*******************************************************************************
- * Helper functions
- ******************************************************************************/
-/* Right rotation */
-static FINLINE uint32_t
-rrot(const uint32_t ui, const unsigned int count)
-{
- ASSERT(count <= 32);
- return ui >> count | ui << (32 - count);
-}
-
+/* Most of this code comes from GnuPG's cipher/sha1.c */
static void
-sha256_process_chunk(struct sha256_ctx* ctx)
+sha256_process_chunk(struct sha256_ctx* ctx, const char chunk[64])
{
uint32_t w[64];
uint32_t a, b, c, d, e, f, g, h;
uint32_t i;
+
+ uint32_t tm;
+ uint32_t t0, t1;
+
ASSERT(ctx);
FOR_EACH(i, 0, 16) {
- w[i] = big_endian_32(((uint32_t*)ctx->chunk)[i]);
- }
- FOR_EACH(i, 16, 64) {
- const uint32_t s0 = rrot(w[i-15],7) ^ rrot(w[i-15],18) ^ (w[i-15] >> 3);
- const uint32_t s1 = rrot(w[i-2],17) ^ rrot(w[i-2], 19) ^ (w[i-2] >> 10);
- w[i] = w[i-16] + s0 + w[i-7] + s1;
+ w[i] = big_endian_32(((uint32_t*)chunk)[i]);
}
a = ctx->state[0];
@@ -80,24 +69,86 @@ sha256_process_chunk(struct sha256_ctx* ctx)
g = ctx->state[6];
h = ctx->state[7];
- /* Compress the chunk */
- FOR_EACH(i, 0, 64) {
- const uint32_t s0 = rrot(a, 2) ^ rrot(a, 13) ^ rrot(a, 22);
- const uint32_t s1 = rrot(e, 6) ^ rrot(e, 11) ^ rrot(e, 25);
- const uint32_t ch = (e & f) ^ ((~e) & g);
- const uint32_t maj = (a & b) ^ (a & c) ^ (b & c);
- const uint32_t tmp1 = h + s1 + ch + k[i] + w[i];
- const uint32_t tmp2 = s0 + maj;
-
- h = g;
- g = f;
- f = e;
- e = d + tmp1;
- d = c;
- c = b;
- b = a;
- a = tmp1 + tmp2;
- }
+ #define ROL(X, N) (((X) << (N)) | ((X) >> (32 - (N))))
+ #define S0(X) (ROL(X,25)^ROL(X,14)^(X>>3))
+ #define S1(X) (ROL(X,15)^ROL(X,13)^(X>>10))
+ #define SS0(X) (ROL(X,30)^ROL(X,19)^ROL(X,10))
+ #define SS1(X) (ROL(X,26)^ROL(X,21)^ROL(X,7))
+ #define M(I) (tm = S1(w[(I- 2)&0x0f]) + w[(I-7)&0x0f] \
+ + S0(w[(I-15)&0x0f]) + w[I&0x0f], w[I&0x0f] = tm)
+ #define F2(A, B, C) (( A & B ) | (C & (A | B)))
+ #define F1(E, F, G) (G ^ (E & (F ^ G)))
+ #define R(A, B, C, D, E, F, G, H, K, M) { \
+ t0 = SS0(A) + F2(A, B, C); \
+ t1 = H + SS1(E) + F1(E, F, G) + K + M; \
+ D += t1; \
+ H = t0 + t1; \
+ } (void)0
+
+ R( a, b, c, d, e, f, g, h, k[ 0], w[ 0] );
+ R( h, a, b, c, d, e, f, g, k[ 1], w[ 1] );
+ R( g, h, a, b, c, d, e, f, k[ 2], w[ 2] );
+ R( f, g, h, a, b, c, d, e, k[ 3], w[ 3] );
+ R( e, f, g, h, a, b, c, d, k[ 4], w[ 4] );
+ R( d, e, f, g, h, a, b, c, k[ 5], w[ 5] );
+ R( c, d, e, f, g, h, a, b, k[ 6], w[ 6] );
+ R( b, c, d, e, f, g, h, a, k[ 7], w[ 7] );
+ R( a, b, c, d, e, f, g, h, k[ 8], w[ 8] );
+ R( h, a, b, c, d, e, f, g, k[ 9], w[ 9] );
+ R( g, h, a, b, c, d, e, f, k[10], w[10] );
+ R( f, g, h, a, b, c, d, e, k[11], w[11] );
+ R( e, f, g, h, a, b, c, d, k[12], w[12] );
+ R( d, e, f, g, h, a, b, c, k[13], w[13] );
+ R( c, d, e, f, g, h, a, b, k[14], w[14] );
+ R( b, c, d, e, f, g, h, a, k[15], w[15] );
+ R( a, b, c, d, e, f, g, h, k[16], M(16) );
+ R( h, a, b, c, d, e, f, g, k[17], M(17) );
+ R( g, h, a, b, c, d, e, f, k[18], M(18) );
+ R( f, g, h, a, b, c, d, e, k[19], M(19) );
+ R( e, f, g, h, a, b, c, d, k[20], M(20) );
+ R( d, e, f, g, h, a, b, c, k[21], M(21) );
+ R( c, d, e, f, g, h, a, b, k[22], M(22) );
+ R( b, c, d, e, f, g, h, a, k[23], M(23) );
+ R( a, b, c, d, e, f, g, h, k[24], M(24) );
+ R( h, a, b, c, d, e, f, g, k[25], M(25) );
+ R( g, h, a, b, c, d, e, f, k[26], M(26) );
+ R( f, g, h, a, b, c, d, e, k[27], M(27) );
+ R( e, f, g, h, a, b, c, d, k[28], M(28) );
+ R( d, e, f, g, h, a, b, c, k[29], M(29) );
+ R( c, d, e, f, g, h, a, b, k[30], M(30) );
+ R( b, c, d, e, f, g, h, a, k[31], M(31) );
+ R( a, b, c, d, e, f, g, h, k[32], M(32) );
+ R( h, a, b, c, d, e, f, g, k[33], M(33) );
+ R( g, h, a, b, c, d, e, f, k[34], M(34) );
+ R( f, g, h, a, b, c, d, e, k[35], M(35) );
+ R( e, f, g, h, a, b, c, d, k[36], M(36) );
+ R( d, e, f, g, h, a, b, c, k[37], M(37) );
+ R( c, d, e, f, g, h, a, b, k[38], M(38) );
+ R( b, c, d, e, f, g, h, a, k[39], M(39) );
+ R( a, b, c, d, e, f, g, h, k[40], M(40) );
+ R( h, a, b, c, d, e, f, g, k[41], M(41) );
+ R( g, h, a, b, c, d, e, f, k[42], M(42) );
+ R( f, g, h, a, b, c, d, e, k[43], M(43) );
+ R( e, f, g, h, a, b, c, d, k[44], M(44) );
+ R( d, e, f, g, h, a, b, c, k[45], M(45) );
+ R( c, d, e, f, g, h, a, b, k[46], M(46) );
+ R( b, c, d, e, f, g, h, a, k[47], M(47) );
+ R( a, b, c, d, e, f, g, h, k[48], M(48) );
+ R( h, a, b, c, d, e, f, g, k[49], M(49) );
+ R( g, h, a, b, c, d, e, f, k[50], M(50) );
+ R( f, g, h, a, b, c, d, e, k[51], M(51) );
+ R( e, f, g, h, a, b, c, d, k[52], M(52) );
+ R( d, e, f, g, h, a, b, c, k[53], M(53) );
+ R( c, d, e, f, g, h, a, b, k[54], M(54) );
+ R( b, c, d, e, f, g, h, a, k[55], M(55) );
+ R( a, b, c, d, e, f, g, h, k[56], M(56) );
+ R( h, a, b, c, d, e, f, g, k[57], M(57) );
+ R( g, h, a, b, c, d, e, f, k[58], M(58) );
+ R( f, g, h, a, b, c, d, e, k[59], M(59) );
+ R( e, f, g, h, a, b, c, d, k[60], M(60) );
+ R( d, e, f, g, h, a, b, c, k[61], M(61) );
+ R( c, d, e, f, g, h, a, b, k[62], M(62) );
+ R( b, c, d, e, f, g, h, a, k[63], M(63) );
ctx->state[0] += a;
ctx->state[1] += b;
@@ -133,21 +184,41 @@ void
sha256_ctx_update
(struct sha256_ctx* ctx,
const char* bytes,
- const size_t len)
+ size_t len)
{
+ size_t n;
uint32_t i;
ASSERT(ctx);
ASSERT(bytes || !len);
- FOR_EACH(i, 0, len) {
- ctx->chunk[ctx->len] = bytes[i];
- ctx->len += 1;
+ if(ctx->len) {
+ n = MMIN(64 - ctx->len, len);
+ memcpy(ctx->chunk + ctx->len, bytes, n);
+ ctx->len += (uint32_t)n;
+ bytes += n;
+ len -= n;
+
if(ctx->len == 64) {
- sha256_process_chunk(ctx);
+ sha256_process_chunk(ctx, ctx->chunk);
ctx->nbits += 512;
ctx->len = 0;
}
}
+
+ if(len >= 64) {
+ n = len / 64;
+ FOR_EACH(i, 0, n) {
+ sha256_process_chunk(ctx, bytes);
+ bytes += 64;
+ }
+ ctx->nbits += n * 512;
+ len -= n * 64;
+ }
+
+ if(len) {
+ memcpy(ctx->chunk, bytes, len);
+ ctx->len = (uint32_t)len;
+ }
}
void
@@ -168,13 +239,13 @@ sha256_ctx_finalize(struct sha256_ctx* ctx, hash256_T hash)
memset(ctx->chunk+i, 0, 56-i);
} else {
memset(ctx->chunk+i, 0, 64-i);
- sha256_process_chunk(ctx);
+ sha256_process_chunk(ctx, ctx->chunk);
memset(ctx->chunk, 0, 56);
}
/* Store the message's length in bits */
*((uint64_t*)(ctx->chunk + 56)) = big_endian_64(ctx->nbits);
- sha256_process_chunk(ctx);
+ sha256_process_chunk(ctx, ctx->chunk);
/* Store result the result */
((uint32_t*)hash)[0] = big_endian_32(ctx->state[0]);