a5e9f55709
In commit 9f480faec5
("crypto: chacha20 - Fix keystream alignment for
chacha20_block()"), I had missed that chacha20_block() can be called
directly on the buffer passed to get_random_bytes(), which can have any
alignment. So, while my commit didn't break anything, it didn't fully
solve the alignment problems.
Revert my solution and just update chacha20_block() to use
put_unaligned_le32(), so the output buffer need not be aligned.
This is simpler, and on many CPUs it's the same speed.
But, I kept the 'tmp' buffers in extract_crng_user() and
_get_random_bytes() 4-byte aligned, since that alignment is actually
needed for _crng_backtrack_protect() too.
Reported-by: Stephan Müller <smueller@chronox.de>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
75 lines
2.5 KiB
C
75 lines
2.5 KiB
C
/*
|
|
* ChaCha20 256-bit cipher algorithm, RFC7539
|
|
*
|
|
* Copyright (C) 2015 Martin Willi
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/export.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/cryptohash.h>
|
|
#include <asm/unaligned.h>
|
|
#include <crypto/chacha20.h>
|
|
|
|
void chacha20_block(u32 *state, u8 *stream)
|
|
{
|
|
u32 x[16];
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(x); i++)
|
|
x[i] = state[i];
|
|
|
|
for (i = 0; i < 20; i += 2) {
|
|
x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16);
|
|
x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16);
|
|
x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16);
|
|
x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16);
|
|
|
|
x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12);
|
|
x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12);
|
|
x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12);
|
|
x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12);
|
|
|
|
x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8);
|
|
x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8);
|
|
x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8);
|
|
x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8);
|
|
|
|
x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7);
|
|
x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7);
|
|
x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7);
|
|
x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7);
|
|
|
|
x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16);
|
|
x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16);
|
|
x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16);
|
|
x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16);
|
|
|
|
x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12);
|
|
x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12);
|
|
x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12);
|
|
x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12);
|
|
|
|
x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8);
|
|
x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8);
|
|
x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8);
|
|
x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8);
|
|
|
|
x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7);
|
|
x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7);
|
|
x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7);
|
|
x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7);
|
|
}
|
|
|
|
for (i = 0; i < ARRAY_SIZE(x); i++)
|
|
put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
|
|
|
|
state[12]++;
|
|
}
|
|
EXPORT_SYMBOL(chacha20_block);
|