Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git

author: Stephen Rothwell <sfr@canb.auug.org.au> 2022-06-28 11:26:37 +1000
committer: Stephen Rothwell <sfr@canb.auug.org.au> 2022-06-28 11:26:37 +1000
commit: ad9dd1674d6bee8c27f02f6c61e5328cbc0bfb64 (patch)
tree: e05ac3b8b9c8de9c85f2711c45199a000ea0c26f
parent: 09239bdc1e3c1626871ffc34250be5332c3c0ef8 (diff)
parent: 9c846c5d2d4e63d75b2cb172625087cadadbe065 (diff)
download: linux-next-ad9dd1674d6bee8c27f02f6c61e5328cbc0bfb64.tar.gz
57 files changed, 4770 insertions, 982 deletions
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 2e9aaa295125a..5ba5817c17c2a 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -337,6 +337,7 @@ Currently, the following pairs of encryption modes are supported:
 - AES-256-XTS for contents and AES-256-CTS-CBC for filenames
 - AES-128-CBC for contents and AES-128-CTS-CBC for filenames
 - Adiantum for both contents and filenames
+- AES-256-XTS for contents and AES-256-HCTR2 for filenames (v2 policies only)
 
 If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
 
@@ -357,6 +358,17 @@ To use Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled.  Also, fast
 implementations of ChaCha and NHPoly1305 should be enabled, e.g.
 CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM.
 
+AES-256-HCTR2 is another true wide-block encryption mode that is intended for
+use on CPUs with dedicated crypto instructions.  AES-256-HCTR2 has the property
+that a bitflip in the plaintext changes the entire ciphertext.  This property
+makes it desirable for filename encryption since initialization vectors are
+reused within a directory.  For more details on AES-256-HCTR2, see the paper
+"Length-preserving encryption with HCTR2"
+(https://eprint.iacr.org/2021/1441.pdf).  To use AES-256-HCTR2,
+CONFIG_CRYPTO_HCTR2 must be enabled.  Also, fast implementations of XCTR and
+POLYVAL should be enabled, e.g. CRYPTO_POLYVAL_ARM64_CE and
+CRYPTO_AES_ARM64_CE_BLK for ARM64.
+
 New encryption modes can be added relatively easily, without changes
 to individual filesystems.  However, authenticated encryption (AE)
 modes are not currently supported because of the difficulty of dealing
@@ -404,11 +416,11 @@ alternatively has the file's nonce (for `DIRECT_KEY policies`_) or
 inode number (for `IV_INO_LBLK_64 policies`_) included in the IVs.
 Thus, IV reuse is limited to within a single directory.
 
-With CTS-CBC, the IV reuse means that when the plaintext filenames
-share a common prefix at least as long as the cipher block size (16
-bytes for AES), the corresponding encrypted filenames will also share
-a common prefix.  This is undesirable.  Adiantum does not have this
-weakness, as it is a wide-block encryption mode.
+With CTS-CBC, the IV reuse means that when the plaintext filenames share a
+common prefix at least as long as the cipher block size (16 bytes for AES), the
+corresponding encrypted filenames will also share a common prefix.  This is
+undesirable.  Adiantum and HCTR2 do not have this weakness, as they are
+wide-block encryption modes.
 
 All supported filenames encryption modes accept any plaintext length
 >= 16 bytes; cipher block alignment is not required.  However,
diff --git a/MAINTAINERS b/MAINTAINERS
index e9e9e99a22960..f3262a1f52d56 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8978,16 +8978,25 @@ F:	Documentation/admin-guide/perf/hisi-pcie-pmu.rst
 F:	Documentation/admin-guide/perf/hisi-pmu.rst
 F:	drivers/perf/hisilicon
 
-HISILICON QM AND ZIP Controller DRIVER
+HISILICON QM DRIVER
+M:	Weili Qian <qianweili@huawei.com>
 M:	Zhou Wang <wangzhou1@hisilicon.com>
 L:	linux-crypto@vger.kernel.org
 S:	Maintained
-F:	Documentation/ABI/testing/debugfs-hisi-zip
+F:	drivers/crypto/hisilicon/Kconfig
+F:	drivers/crypto/hisilicon/Makefile
 F:	drivers/crypto/hisilicon/qm.c
 F:	drivers/crypto/hisilicon/sgl.c
-F:	drivers/crypto/hisilicon/zip/
 F:	include/linux/hisi_acc_qm.h
 
+HISILICON ZIP Controller DRIVER
+M:	Yang Shen <shenyang39@huawei.com>
+M:	Zhou Wang <wangzhou1@hisilicon.com>
+L:	linux-crypto@vger.kernel.org
+S:	Maintained
+F:	Documentation/ABI/testing/debugfs-hisi-zip
+F:	drivers/crypto/hisilicon/zip/
+
 HISILICON ROCE DRIVER
 M:	Wenpeng Liang <liangwenpeng@huawei.com>
 M:	Weihang Li <liweihang@huawei.com>
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index e4dba5461cb3e..149a5bd6b88c1 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -63,7 +63,7 @@ config CRYPTO_SHA512_ARM
 	  using optimized ARM assembler and NEON, when available.
 
 config CRYPTO_BLAKE2S_ARM
-	tristate "BLAKE2s digest algorithm (ARM)"
+	bool "BLAKE2s digest algorithm (ARM)"
 	select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
 	help
 	  BLAKE2s digest algorithm optimized with ARM scalar instructions.  This
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 0274f81cc8ea0..971e74546fb1b 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -9,8 +9,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
-obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += blake2s-arm.o
-obj-$(if $(CONFIG_CRYPTO_BLAKE2S_ARM),y) += libblake2s-arm.o
+obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o
 obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
 obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
@@ -32,7 +31,6 @@ sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
 sha256-arm-y	:= sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
 sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o
 sha512-arm-y	:= sha512-core.o sha512-glue.o $(sha512-arm-neon-y)
-blake2s-arm-y   := blake2s-shash.o
 libblake2s-arm-y:= blake2s-core.o blake2s-glue.o
 blake2b-neon-y  := blake2b-neon-core.o blake2b-neon-glue.o
 sha1-arm-ce-y	:= sha1-ce-core.o sha1-ce-glue.o
diff --git a/arch/arm/crypto/blake2s-shash.c b/arch/arm/crypto/blake2s-shash.c
deleted file mode 100644
index 763c73beea2d0..0000000000000
--- a/arch/arm/crypto/blake2s-shash.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * BLAKE2s digest algorithm, ARM scalar implementation
- *
- * Copyright 2020 Google LLC
- */
-
-#include <crypto/internal/blake2s.h>
-#include <crypto/internal/hash.h>
-
-#include <linux/module.h>
-
-static int crypto_blake2s_update_arm(struct shash_desc *desc,
-				     const u8 *in, unsigned int inlen)
-{
-	return crypto_blake2s_update(desc, in, inlen, false);
-}
-
-static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out)
-{
-	return crypto_blake2s_final(desc, out, false);
-}
-
-#define BLAKE2S_ALG(name, driver_name, digest_size)			\
-	{								\
-		.base.cra_name		= name,				\
-		.base.cra_driver_name	= driver_name,			\
-		.base.cra_priority	= 200,				\
-		.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,	\
-		.base.cra_blocksize	= BLAKE2S_BLOCK_SIZE,		\
-		.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx), \
-		.base.cra_module	= THIS_MODULE,			\
-		.digestsize		= digest_size,			\
-		.setkey			= crypto_blake2s_setkey,	\
-		.init			= crypto_blake2s_init,		\
-		.update			= crypto_blake2s_update_arm,	\
-		.final			= crypto_blake2s_final_arm,	\
-		.descsize		= sizeof(struct blake2s_state),	\
-	}
-
-static struct shash_alg blake2s_arm_algs[] = {
-	BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE),
-	BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE),
-	BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE),
-	BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE),
-};
-
-static int __init blake2s_arm_mod_init(void)
-{
-	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
-		crypto_register_shashes(blake2s_arm_algs,
-					ARRAY_SIZE(blake2s_arm_algs)) : 0;
-}
-
-static void __exit blake2s_arm_mod_exit(void)
-{
-	if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
-		crypto_unregister_shashes(blake2s_arm_algs,
-					  ARRAY_SIZE(blake2s_arm_algs));
-}
-
-module_init(blake2s_arm_mod_init);
-module_exit(blake2s_arm_mod_exit);
-
-MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("blake2s-128");
-MODULE_ALIAS_CRYPTO("blake2s-128-arm");
-MODULE_ALIAS_CRYPTO("blake2s-160");
-MODULE_ALIAS_CRYPTO("blake2s-160-arm");
-MODULE_ALIAS_CRYPTO("blake2s-224");
-MODULE_ALIAS_CRYPTO("blake2s-224-arm");
-MODULE_ALIAS_CRYPTO("blake2s-256");
-MODULE_ALIAS_CRYPTO("blake2s-256-arm");
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index ac85682c013c1..4391a463abd77 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -72,6 +72,11 @@ config CRYPTO_GHASH_ARM64_CE
 	select CRYPTO_GF128MUL
 	select CRYPTO_LIB_AES
 
+config CRYPTO_POLYVAL_ARM64_CE
+	tristate "POLYVAL using ARMv8 Crypto Extensions (for HCTR2)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_POLYVAL
+
 config CRYPTO_CRCT10DIF_ARM64_CE
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"
 	depends on KERNEL_MODE_NEON && CRC_T10DIF
@@ -96,13 +101,13 @@ config CRYPTO_AES_ARM64_CE_CCM
 	select CRYPTO_LIB_AES
 
 config CRYPTO_AES_ARM64_CE_BLK
-	tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
+	tristate "AES in ECB/CBC/CTR/XTS/XCTR modes using ARMv8 Crypto Extensions"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_SKCIPHER
 	select CRYPTO_AES_ARM64_CE
 
 config CRYPTO_AES_ARM64_NEON_BLK
-	tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
+	tristate "AES in ECB/CBC/CTR/XTS/XCTR modes using NEON instructions"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_AES
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index bea8995133b1f..24bb0c4610de2 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -32,6 +32,9 @@ sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
 obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
 ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
 
+obj-$(CONFIG_CRYPTO_POLYVAL_ARM64_CE) += polyval-ce.o
+polyval-ce-y := polyval-ce-glue.o polyval-ce-core.o
+
 obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o
 crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
 
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 561dd23325711..162787c7aa865 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -34,10 +34,11 @@
 #define aes_essiv_cbc_encrypt	ce_aes_essiv_cbc_encrypt
 #define aes_essiv_cbc_decrypt	ce_aes_essiv_cbc_decrypt
 #define aes_ctr_encrypt		ce_aes_ctr_encrypt
+#define aes_xctr_encrypt	ce_aes_xctr_encrypt
 #define aes_xts_encrypt		ce_aes_xts_encrypt
 #define aes_xts_decrypt		ce_aes_xts_decrypt
 #define aes_mac_update		ce_aes_mac_update
-MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS/XCTR using ARMv8 Crypto Extensions");
 #else
 #define MODE			"neon"
 #define PRIO			200
@@ -50,16 +51,18 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
 #define aes_essiv_cbc_encrypt	neon_aes_essiv_cbc_encrypt
 #define aes_essiv_cbc_decrypt	neon_aes_essiv_cbc_decrypt
 #define aes_ctr_encrypt		neon_aes_ctr_encrypt
+#define aes_xctr_encrypt	neon_aes_xctr_encrypt
 #define aes_xts_encrypt		neon_aes_xts_encrypt
 #define aes_xts_decrypt		neon_aes_xts_decrypt
 #define aes_mac_update		neon_aes_mac_update
-MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS/XCTR using ARMv8 NEON");
 #endif
 #if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS)
 MODULE_ALIAS_CRYPTO("ecb(aes)");
 MODULE_ALIAS_CRYPTO("cbc(aes)");
 MODULE_ALIAS_CRYPTO("ctr(aes)");
 MODULE_ALIAS_CRYPTO("xts(aes)");
+MODULE_ALIAS_CRYPTO("xctr(aes)");
 #endif
 MODULE_ALIAS_CRYPTO("cts(cbc(aes))");
 MODULE_ALIAS_CRYPTO("essiv(cbc(aes),sha256)");
@@ -89,6 +92,9 @@ asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
 asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
 				int rounds, int bytes, u8 ctr[]);
 
+asmlinkage void aes_xctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
+				 int rounds, int bytes, u8 ctr[], int byte_ctr);
+
 asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
 				int rounds, int bytes, u32 const rk2[], u8 iv[],
 				int first);
@@ -442,6 +448,52 @@ static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req)
 	return err ?: cbc_decrypt_walk(req, &walk);
 }
 
+static int __maybe_unused xctr_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err, rounds = 6 + ctx->key_length / 4;
+	struct skcipher_walk walk;
+	unsigned int byte_ctr = 0;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while (walk.nbytes > 0) {
+		const u8 *src = walk.src.virt.addr;
+		unsigned int nbytes = walk.nbytes;
+		u8 *dst = walk.dst.virt.addr;
+		u8 buf[AES_BLOCK_SIZE];
+
+		/*
+		 * If given less than 16 bytes, we must copy the partial block
+		 * into a temporary buffer of 16 bytes to avoid out of bounds
+		 * reads and writes.  Furthermore, this code is somewhat unusual
+		 * in that it expects the end of the data to be at the end of
+		 * the temporary buffer, rather than the start of the data at
+		 * the start of the temporary buffer.
+		 */
+		if (unlikely(nbytes < AES_BLOCK_SIZE))
+			src = dst = memcpy(buf + sizeof(buf) - nbytes,
+					   src, nbytes);
+		else if (nbytes < walk.total)
+			nbytes &= ~(AES_BLOCK_SIZE - 1);
+
+		kernel_neon_begin();
+		aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
+						 walk.iv, byte_ctr);
+		kernel_neon_end();
+
+		if (unlikely(nbytes < AES_BLOCK_SIZE))
+			memcpy(walk.dst.virt.addr,
+			       buf + sizeof(buf) - nbytes, nbytes);
+		byte_ctr += nbytes;
+
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	return err;
+}
+
 static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -457,6 +509,14 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
 		u8 *dst = walk.dst.virt.addr;
 		u8 buf[AES_BLOCK_SIZE];
 
+		/*
+		 * If given less than 16 bytes, we must copy the partial block
+		 * into a temporary buffer of 16 bytes to avoid out of bounds
+		 * reads and writes.  Furthermore, this code is somewhat unusual
+		 * in that it expects the end of the data to be at the end of
+		 * the temporary buffer, rather than the start of the data at
+		 * the start of the temporary buffer.
+		 */
 		if (unlikely(nbytes < AES_BLOCK_SIZE))
 			src = dst = memcpy(buf + sizeof(buf) - nbytes,
 					   src, nbytes);
@@ -671,6 +731,22 @@ static struct skcipher_alg aes_algs[] = { {
 	.decrypt	= ctr_encrypt,
 }, {
 	.base = {
+		.cra_name		= "xctr(aes)",
+		.cra_driver_name	= "xctr-aes-" MODE,
+		.cra_priority		= PRIO,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+		.cra_module		= THIS_MODULE,
+	},
+	.min_keysize	= AES_MIN_KEY_SIZE,
+	.max_keysize	= AES_MAX_KEY_SIZE,
+	.ivsize		= AES_BLOCK_SIZE,
+	.chunksize	= AES_BLOCK_SIZE,
+	.setkey		= skcipher_aes_setkey,
+	.encrypt	= xctr_encrypt,
+	.decrypt	= xctr_encrypt,
+}, {
+	.base = {
 		.cra_name		= "xts(aes)",
 		.cra_driver_name	= "xts-aes-" MODE,
 		.cra_priority		= PRIO,
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index dc35eb0245c55..5abc834271f4a 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -318,127 +318,211 @@ AES_FUNC_END(aes_cbc_cts_decrypt)
 	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 	.previous
 
-
 	/*
-	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int bytes, u8 ctr[])
+	 * This macro generates the code for CTR and XCTR mode.
 	 */
+.macro ctr_encrypt xctr
+	// Arguments
+	OUT		.req x0
+	IN		.req x1
+	KEY		.req x2
+	ROUNDS_W	.req w3
+	BYTES_W		.req w4
+	IV		.req x5
+	BYTE_CTR_W 	.req w6		// XCTR only
+	// Intermediate values
+	CTR_W		.req w11	// XCTR only
+	CTR		.req x11	// XCTR only
+	IV_PART		.req x12
+	BLOCKS		.req x13
+	BLOCKS_W	.req w13
 
-AES_FUNC_START(aes_ctr_encrypt)
 	stp		x29, x30, [sp, #-16]!
 	mov		x29, sp
 
-	enc_prepare	w3, x2, x12
-	ld1		{vctr.16b}, [x5]
+	enc_prepare	ROUNDS_W, KEY, IV_PART
+	ld1		{vctr.16b}, [IV]
 
-	umov		x12, vctr.d[1]		/* keep swabbed ctr in reg */
-	rev		x12, x12
+	/*
+	 * Keep 64 bits of the IV in a register.  For CTR mode this lets us
+	 * easily increment the IV.  For XCTR mode this lets us efficiently XOR
+	 * the 64-bit counter with the IV.
+	 */
+	.if \xctr
+		umov		IV_PART, vctr.d[0]
+		lsr		CTR_W, BYTE_CTR_W, #4
+	.else
+		umov		IV_PART, vctr.d[1]
+		rev		IV_PART, IV_PART
+	.endif
 
-.LctrloopNx:
-	add		w7, w4, #15
-	sub		w4, w4, #MAX_STRIDE << 4
-	lsr		w7, w7, #4
+.LctrloopNx\xctr:
+	add		BLOCKS_W, BYTES_W, #15
+	sub		BYTES_W, BYTES_W, #MAX_STRIDE << 4
+	lsr		BLOCKS_W, BLOCKS_W, #4
 	mov		w8, #MAX_STRIDE
-	cmp		w7, w8
-	csel		w7, w7, w8, lt
-	adds		x12, x12, x7
+	cmp		BLOCKS_W, w8
+	csel		BLOCKS_W, BLOCKS_W, w8, lt
 
+	/*
+	 * Set up the counter values in v0-v{MAX_STRIDE-1}.
+	 *
+	 * If we are encrypting less than MAX_STRIDE blocks, the tail block
+	 * handling code expects the last keystream block to be in
+	 * v{MAX_STRIDE-1}.  For example: if encrypting two blocks with
+	 * MAX_STRIDE=5, then v3 and v4 should have the next two counter blocks.
+	 */
+	.if \xctr
+		add		CTR, CTR, BLOCKS
+	.else
+		adds		IV_PART, IV_PART, BLOCKS
+	.endif
 	mov		v0.16b, vctr.16b
 	mov		v1.16b, vctr.16b
 	mov		v2.16b, vctr.16b
 	mov		v3.16b, vctr.16b
 ST5(	mov		v4.16b, vctr.16b		)
-	bcs		0f
+	.if \xctr
+		sub		x6, CTR, #MAX_STRIDE - 1
+		sub		x7, CTR, #MAX_STRIDE - 2
+		sub		x8, CTR, #MAX_STRIDE - 3
+		sub		x9, CTR, #MAX_STRIDE - 4
+ST5(		sub		x10, CTR, #MAX_STRIDE - 5	)
+		eor		x6, x6, IV_PART
+		eor		x7, x7, IV_PART
+		eor		x8, x8, IV_PART
+		eor		x9, x9, IV_PART
+ST5(		eor		x10, x10, IV_PART		)
+		mov		v0.d[0], x6
+		mov		v1.d[0], x7
+		mov		v2.d[0], x8
+		mov		v3.d[0], x9
+ST5(		mov		v4.d[0], x10			)
+	.else
+		bcs		0f
+		.subsection	1
+		/*
+		 * This subsection handles carries.
+		 *
+		 * Conditional branching here is allowed with respect to time
+		 * invariance since the branches are dependent on the IV instead
+		 * of the plaintext or key.  This code is rarely executed in
+		 * practice anyway.
+		 */
 
-	.subsection	1
-	/* apply carry to outgoing counter */
-0:	umov		x8, vctr.d[0]
-	rev		x8, x8
-	add		x8, x8, #1
-	rev		x8, x8
-	ins		vctr.d[0], x8
+		/* Apply carry to outgoing counter. */
+0:		umov		x8, vctr.d[0]
+		rev		x8, x8
+		add		x8, x8, #1
+		rev		x8, x8
+		ins		vctr.d[0], x8
 
-	/* apply carry to N counter blocks for N := x12 */
-	cbz		x12, 2f
-	adr		x16, 1f
-	sub		x16, x16, x12, lsl #3
-	br		x16
-	bti		c
-	mov		v0.d[0], vctr.d[0]
-	bti		c
-	mov		v1.d[0], vctr.d[0]
-	bti		c
-	mov		v2.d[0], vctr.d[0]
-	bti		c
-	mov		v3.d[0], vctr.d[0]
-ST5(	bti		c				)
-ST5(	mov		v4.d[0], vctr.d[0]		)
-1:	b		2f
-	.previous
+		/*
+		 * Apply carry to counter blocks if needed.
+		 *
+		 * Since the carry flag was set, we know 0 <= IV_PART <
+		 * MAX_STRIDE.  Using the value of IV_PART we can determine how
+		 * many counter blocks need to be updated.
+		 */
+		cbz		IV_PART, 2f
+		adr		x16, 1f
+		sub		x16, x16, IV_PART, lsl #3
+		br		x16
+		bti		c
+		mov		v0.d[0], vctr.d[0]
+		bti		c
+		mov		v1.d[0], vctr.d[0]
+		bti		c
+		mov		v2.d[0], vctr.d[0]
+		bti		c
+		mov		v3.d[0], vctr.d[0]
+ST5(		bti		c				)
+ST5(		mov		v4.d[0], vctr.d[0]		)
+1:		b		2f
+		.previous
+
+2:		rev		x7, IV_PART
+		ins		vctr.d[1], x7
+		sub		x7, IV_PART, #MAX_STRIDE - 1
+		sub		x8, IV_PART, #MAX_STRIDE - 2
+		sub		x9, IV_PART, #MAX_STRIDE - 3
+		rev		x7, x7
+		rev		x8, x8
+		mov		v1.d[1], x7
+		rev		x9, x9
+ST5(		sub		x10, IV_PART, #MAX_STRIDE - 4	)
+		mov		v2.d[1], x8
+ST5(		rev		x10, x10			)
+		mov		v3.d[1], x9
+ST5(		mov		v4.d[1], x10			)
+	.endif
 
-2:	rev		x7, x12
-	ins		vctr.d[1], x7
-	sub		x7, x12, #MAX_STRIDE - 1
-	sub		x8, x12, #MAX_STRIDE - 2
-	sub		x9, x12, #MAX_STRIDE - 3
-	rev		x7, x7
-	rev		x8, x8
-	mov		v1.d[1], x7
-	rev		x9, x9
-ST5(	sub		x10, x12, #MAX_STRIDE - 4	)
-	mov		v2.d[1], x8
-ST5(	rev		x10, x10			)
-	mov		v3.d[1], x9
-ST5(	mov		v4.d[1], x10			)
-	tbnz		w4, #31, .Lctrtail
-	ld1		{v5.16b-v7.16b}, [x1], #48
+	/*
+	 * If there are at least MAX_STRIDE blocks left, XOR the data with
+	 * keystream and store.  Otherwise jump to tail handling.
+	 */
+	tbnz		BYTES_W, #31, .Lctrtail\xctr
+	ld1		{v5.16b-v7.16b}, [IN], #48
 ST4(	bl		aes_encrypt_block4x		)
 ST5(	bl		aes_encrypt_block5x		)
 	eor		v0.16b, v5.16b, v0.16b
-ST4(	ld1		{v5.16b}, [x1], #16		)
+ST4(	ld1		{v5.16b}, [IN], #16		)
 	eor		v1.16b, v6.16b, v1.16b
-ST5(	ld1		{v5.16b-v6.16b}, [x1], #32	)
+ST5(	ld1		{v5.16b-v6.16b}, [IN], #32	)
 	eor		v2.16b, v7.16b, v2.16b
 	eor		v3.16b, v5.16b, v3.16b
 ST5(	eor		v4.16b, v6.16b, v4.16b		)
-	st1		{v0.16b-v3.16b}, [x0], #64
-ST5(	st1		{v4.16b}, [x0], #16		)
-	cbz		w4, .Lctrout
-	b		.LctrloopNx
+	st1		{v0.16b-v3.16b}, [OUT], #64
+ST5(	st1		{v4.16b}, [OUT], #16		)
+	cbz		BYTES_W, .Lctrout\xctr
+	b		.LctrloopNx\xctr
 
-.Lctrout:
-	st1		{vctr.16b}, [x5]	/* return next CTR value */
+.Lctrout\xctr:
+	.if !\xctr
+		st1		{vctr.16b}, [IV] /* return next CTR value */
+	.endif
 	ldp		x29, x30, [sp], #16
 	ret
 
-.Lctrtail:
-	/* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */
+.Lctrtail\xctr:
+	/*
+	 * Handle up to MAX_STRIDE * 16 - 1 bytes of plaintext
+	 *
+	 * This code expects the last keystream block to be in v{MAX_STRIDE-1}.
+	 * For example: if encrypting two blocks with MAX_STRIDE=5, then v3 and
+	 * v4 should have the next two counter blocks.
+	 *
+	 * This allows us to store the ciphertext by writing to overlapping
+	 * regions of memory.  Any invalid ciphertext blocks get overwritten by
+	 * correctly computed blocks.  This approach greatly simplifies the
+	 * logic for storing the ciphertext.
+	 */
 	mov		x16, #16
-	ands		x6, x4, #0xf
-	csel		x13, x6, x16, ne
+	ands		w7, BYTES_W, #0xf
+	csel		x13, x7, x16, ne
 
-ST5(	cmp		w4, #64 - (MAX_STRIDE << 4)	)
+ST5(	cmp		BYTES_W, #64 - (MAX_STRIDE << 4))
 ST5(	csel		x14, x16, xzr, gt		)
-	cmp		w4, #48 - (MAX_STRIDE << 4)
+	cmp		BYTES_W, #48 - (MAX_STRIDE << 4)
 	csel		x15, x16, xzr, gt
-	cmp		w4, #32 - (MAX_STRIDE << 4)
+	cmp		BYTES_W, #32 - (MAX_STRIDE << 4)
 	csel		x16, x16, xzr, gt
-	cmp		w4, #16 - (MAX_STRIDE << 4)
+	cmp		BYTES_W, #16 - (MAX_STRIDE << 4)
 
-	adr_l		x12, .Lcts_permute_table
-	add		x12, x12, x13
-	ble		.Lctrtail1x
+	adr_l		x9, .Lcts_permute_table
+	add		x9, x9, x13
+	ble		.Lctrtail1x\xctr
 
-ST5(	ld1		{v5.16b}, [x1], x14		)
-	ld1		{v6.16b}, [x1], x15
-	ld1		{v7.16b}, [x1], x16
+ST5(	ld1		{v5.16b}, [IN], x14		)
+	ld1		{v6.16b}, [IN], x15
+	ld1		{v7.16b}, [IN], x16
 
 ST4(	bl		aes_encrypt_block4x		)
 ST5(	bl		aes_encrypt_block5x		)
 
-	ld1		{v8.16b}, [x1], x13
-	ld1		{v9.16b}, [x1]
-	ld1		{v10.16b}, [x12]
+	ld1		{v8.16b}, [IN], x13
+	ld1		{v9.16b}, [IN]
+	ld1		{v10.16b}, [x9]
 
 ST4(	eor		v6.16b, v6.16b, v0.16b		)
 ST4(	eor		v7.16b, v7.16b, v1.16b		)
@@ -453,32 +537,91 @@ ST5(	eor		v7.16b, v7.16b, v2.16b		)
 ST5(	eor		v8.16b, v8.16b, v3.16b		)
 ST5(	eor		v9.16b, v9.16b, v4.16b		)
 
-ST5(	st1		{v5.16b}, [x0], x14		)
-	st1		{v6.16b}, [x0], x15
-	st1		{v7.16b}, [x0], x16
-	add		x13, x13, x0
+ST5(	st1		{v5.16b}, [OUT], x14		)
+	st1		{v6.16b}, [OUT], x15
+	st1		{v7.16b}, [OUT], x16
+	add		x13, x13, OUT
 	st1		{v9.16b}, [x13]		// overlapping stores
-	st1		{v8.16b}, [x0]
-	b		.Lctrout
+	st1		{v8.16b}, [OUT]
+	b		.Lctrout\xctr
 
-.Lctrtail1x:
-	sub		x7, x6, #16
-	csel		x6, x6, x7, eq
-	add		x1, x1, x6
-	add		x0, x0, x6
-	ld1		{v5.16b}, [x1]
-	ld1		{v6.16b}, [x0]
+.Lctrtail1x\xctr:
+	/*
+	 * Handle <= 16 bytes of plaintext
+	 *
+	 * This code always reads and writes 16 bytes.  To avoid out of bounds
+	 * accesses, XCTR and CTR modes must use a temporary buffer when
+	 * encrypting/decrypting less than 16 bytes.
+	 *
+	 * This code is unusual in that it loads the input and stores the output
+	 * relative to the end of the buffers rather than relative to the start.
+	 * This causes unusual behaviour when encrypting/decrypting less than 16
+	 * bytes; the end of the data is expected to be at the end of the
+	 * temporary buffer rather than the start of the data being at the start
+	 * of the temporary buffer.
+	 */
+	sub		x8, x7, #16
+	csel		x7, x7, x8, eq
+	add		IN, IN, x7
+	add		OUT, OUT, x7
+	ld1		{v5.16b}, [IN]
+	ld1		{v6.16b}, [OUT]
 ST5(	mov		v3.16b, v4.16b			)
-	encrypt_block	v3, w3, x2, x8, w7
-	ld1		{v10.16b-v11.16b}, [x12]
+	encrypt_block	v3, ROUNDS_W, KEY, x8, w7
+	ld1		{v10.16b-v11.16b}, [x9]
 	tbl		v3.16b, {v3.16b}, v10.16b
 	sshr		v11.16b, v11.16b, #7
 	eor		v5.16b, v5.16b, v3.16b
 	bif		v5.16b, v6.16b, v11.16b
-	st1		{v5.16b}, [x0]
-	b		.Lctrout
+	st1		{v5.16b}, [OUT]
+	b		.Lctrout\xctr
+
+	// Arguments
+	.unreq OUT
+	.unreq IN
+	.unreq KEY
+	.unreq ROUNDS_W
+	.unreq BYTES_W
+	.unreq IV
+	.unreq BYTE_CTR_W	// XCTR only
+	// Intermediate values
+	.unreq CTR_W		// XCTR only
+	.unreq CTR		// XCTR only
+	.unreq IV_PART
+	.unreq BLOCKS
+	.unreq BLOCKS_W
+.endm
+
+	/*
+	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int bytes, u8 ctr[])
+	 *
+	 * The input and output buffers must always be at least 16 bytes even if
+	 * encrypting/decrypting less than 16 bytes.  Otherwise out of bounds
+	 * accesses will occur.  The data to be encrypted/decrypted is expected
+	 * to be at the end of this 16-byte temporary buffer rather than the
+	 * start.
+	 */
+
+AES_FUNC_START(aes_ctr_encrypt)
+	ctr_encrypt 0
 AES_FUNC_END(aes_ctr_encrypt)
 
+	/*
+	 * aes_xctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		   int bytes, u8 const iv[], int byte_ctr)
+	 *
+	 * The input and output buffers must always be at least 16 bytes even if
+	 * encrypting/decrypting less than 16 bytes.  Otherwise out of bounds
+	 * accesses will occur.  The data to be encrypted/decrypted is expected
+	 * to be at the end of this 16-byte temporary buffer rather than the
+	 * start.
+	 */
+
+AES_FUNC_START(aes_xctr_encrypt)
+	ctr_encrypt 1
+AES_FUNC_END(aes_xctr_encrypt)
+
 
 	/*
 	 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
diff --git a/arch/arm64/crypto/polyval-ce-core.S b/arch/arm64/crypto/polyval-ce-core.S
new file mode 100644
index 0000000000000..b5326540d2e34
--- /dev/null
+++ b/arch/arm64/crypto/polyval-ce-core.S
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Implementation of POLYVAL using ARMv8 Crypto Extensions.
+ *
+ * Copyright 2021 Google LLC
+ */
+/*
+ * This is an efficient implementation of POLYVAL using ARMv8 Crypto Extensions
+ * It works on 8 blocks at a time, by precomputing the first 8 keys powers h^8,
+ * ..., h^1 in the POLYVAL finite field. This precomputation allows us to split
+ * finite field multiplication into two steps.
+ *
+ * In the first step, we consider h^i, m_i as normal polynomials of degree less
+ * than 128. We then compute p(x) = h^8m_0 + ... + h^1m_7 where multiplication
+ * is simply polynomial multiplication.
+ *
+ * In the second step, we compute the reduction of p(x) modulo the finite field
+ * modulus g(x) = x^128 + x^127 + x^126 + x^121 + 1.
+ *
+ * This two step process is equivalent to computing h^8m_0 + ... + h^1m_7 where
+ * multiplication is finite field multiplication. The advantage is that the
+ * two-step process  only requires 1 finite field reduction for every 8
+ * polynomial multiplications. Further parallelism is gained by interleaving the
+ * multiplications and polynomial reductions.
+ */
+
+#include <linux/linkage.h>
+#define STRIDE_BLOCKS 8
+
+KEY_POWERS	.req	x0
+MSG		.req	x1
+BLOCKS_LEFT	.req	x2
+ACCUMULATOR	.req	x3
+KEY_START	.req	x10
+EXTRA_BYTES	.req	x11
+TMP	.req	x13
+
+M0	.req	v0
+M1	.req	v1
+M2	.req	v2
+M3	.req	v3
+M4	.req	v4
+M5	.req	v5
+M6	.req	v6
+M7	.req	v7
+KEY8	.req	v8
+KEY7	.req	v9
+KEY6	.req	v10
+KEY5	.req	v11
+KEY4	.req	v12
+KEY3	.req	v13
+KEY2	.req	v14
+KEY1	.req	v15
+PL	.req	v16
+PH	.req	v17
+TMP_V	.req	v18
+LO	.req	v20
+MI	.req	v21
+HI	.req	v22
+SUM	.req	v23
+GSTAR	.req	v24
+
+	.text
+
+	.arch	armv8-a+crypto
+	.align	4
+
+.Lgstar:
+	.quad	0xc200000000000000, 0xc200000000000000
+
+/*
+ * Computes the product of two 128-bit polynomials in X and Y and XORs the
+ * components of the 256-bit product into LO, MI, HI.
+ *
+ * Given:
+ *  X = [X_1 : X_0]
+ *  Y = [Y_1 : Y_0]
+ *
+ * We compute:
+ *  LO += X_0 * Y_0
+ *  MI += (X_0 + X_1) * (Y_0 + Y_1)
+ *  HI += X_1 * Y_1
+ *
+ * Later, the 256-bit result can be extracted as:
+ *   [HI_1 : HI_0 + HI_1 + MI_1 + LO_1 : LO_1 + HI_0 + MI_0 + LO_0 : LO_0]
+ * This step is done when computing the polynomial reduction for efficiency
+ * reasons.
+ *
+ * Karatsuba multiplication is used instead of Schoolbook multiplication because
+ * it was found to be slightly faster on ARM64 CPUs.
+ *
+ */
+.macro karatsuba1 X Y
+	X .req \X
+	Y .req \Y
+	ext	v25.16b, X.16b, X.16b, #8
+	ext	v26.16b, Y.16b, Y.16b, #8
+	eor	v25.16b, v25.16b, X.16b
+	eor	v26.16b, v26.16b, Y.16b
+	pmull2	v28.1q, X.2d, Y.2d
+	pmull	v29.1q, X.1d, Y.1d
+	pmull	v27.1q, v25.1d, v26.1d
+	eor	HI.16b, HI.16b, v28.16b
+	eor	LO.16b, LO.16b, v29.16b
+	eor	MI.16b, MI.16b, v27.16b
+	.unreq X
+	.unreq Y
+.endm
+
+/*
+ * Same as karatsuba1, except overwrites HI, LO, MI rather than XORing into
+ * them.
+ */
+.macro karatsuba1_store X Y
+	X .req \X
+	Y .req \Y
+	ext	v25.16b, X.16b, X.16b, #8
+	ext	v26.16b, Y.16b, Y.16b, #8
+	eor	v25.16b, v25.16b, X.16b
+	eor	v26.16b, v26.16b, Y.16b
+	pmull2	HI.1q, X.2d, Y.2d
+	pmull	LO.1q, X.1d, Y.1d
+	pmull	MI.1q, v25.1d, v26.1d
+	.unreq X
+	.unreq Y
+.endm
+
+/*
+ * Computes the 256-bit polynomial represented by LO, HI, MI. Stores
+ * the result in PL, PH.
+ * [PH : PL] =
+ *   [HI_1 : HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0 : LO_0]
+ */
+.macro karatsuba2
+	// v4 = [HI_1 + MI_1 : HI_0 + MI_0]
+	eor	v4.16b, HI.16b, MI.16b
+	// v4 = [HI_1 + MI_1 + LO_1 : HI_0 + MI_0 + LO_0]
+	eor	v4.16b, v4.16b, LO.16b
+	// v5 = [HI_0 : LO_1]
+	ext	v5.16b, LO.16b, HI.16b, #8
+	// v4 = [HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0]
+	eor	v4.16b, v4.16b, v5.16b
+	// HI = [HI_0 : HI_1]
+	ext	HI.16b, HI.16b, HI.16b, #8
+	// LO = [LO_0 : LO_1]
+	ext	LO.16b, LO.16b, LO.16b, #8
+	// PH = [HI_1 : HI_1 + HI_0 + MI_1 + LO_1]
+	ext	PH.16b, v4.16b, HI.16b, #8
+	// PL = [HI_0 + MI_0 + LO_1 + LO_0 : LO_0]
+	ext	PL.16b, LO.16b, v4.16b, #8
+.endm
+
+/*
+ * Computes the 128-bit reduction of PH : PL. Stores the result in dest.
+ *
+ * This macro computes p(x) mod g(x) where p(x) is in montgomery form and g(x) =
+ * x^128 + x^127 + x^126 + x^121 + 1.
+ *
+ * We have a 256-bit polynomial PH : PL = P_3 : P_2 : P_1 : P_0 that is the
+ * product of two 128-bit polynomials in Montgomery form.  We need to reduce it
+ * mod g(x).  Also, since polynomials in Montgomery form have an "extra" factor
+ * of x^128, this product has two extra factors of x^128.  To get it back into
+ * Montgomery form, we need to remove one of these factors by dividing by x^128.
+ *
+ * To accomplish both of these goals, we add multiples of g(x) that cancel out
+ * the low 128 bits P_1 : P_0, leaving just the high 128 bits. Since the low
+ * bits are zero, the polynomial division by x^128 can be done by right
+ * shifting.
+ *
+ * Since the only nonzero term in the low 64 bits of g(x) is the constant term,
+ * the multiple of g(x) needed to cancel out P_0 is P_0 * g(x).  The CPU can
+ * only do 64x64 bit multiplications, so split P_0 * g(x) into x^128 * P_0 +
+ * x^64 * g*(x) * P_0 + P_0, where g*(x) is bits 64-127 of g(x).  Adding this to
+ * the original polynomial gives P_3 : P_2 + P_0 + T_1 : P_1 + T_0 : 0, where T
+ * = T_1 : T_0 = g*(x) * P_0.  Thus, bits 0-63 got "folded" into bits 64-191.
+ *
+ * Repeating this same process on the next 64 bits "folds" bits 64-127 into bits
+ * 128-255, giving the answer in bits 128-255. This time, we need to cancel P_1
+ * + T_0 in bits 64-127. The multiple of g(x) required is (P_1 + T_0) * g(x) *
+ * x^64. Adding this to our previous computation gives P_3 + P_1 + T_0 + V_1 :
+ * P_2 + P_0 + T_1 + V_0 : 0 : 0, where V = V_1 : V_0 = g*(x) * (P_1 + T_0).
+ *
+ * So our final computation is:
+ *   T = T_1 : T_0 = g*(x) * P_0
+ *   V = V_1 : V_0 = g*(x) * (P_1 + T_0)
+ *   p(x) / x^{128} mod g(x) = P_3 + P_1 + T_0 + V_1 : P_2 + P_0 + T_1 + V_0
+ *
+ * The implementation below saves a XOR instruction by computing P_1 + T_0 : P_0
+ * + T_1 and XORing into dest, rather than separately XORing P_1 : P_0 and T_0 :
+ * T_1 into dest.  This allows us to reuse P_1 + T_0 when computing V.
+ */
+.macro montgomery_reduction dest
+	DEST .req \dest
+	// TMP_V = T_1 : T_0 = P_0 * g*(x)
+	pmull	TMP_V.1q, PL.1d, GSTAR.1d
+	// TMP_V = T_0 : T_1
+	ext	TMP_V.16b, TMP_V.16b, TMP_V.16b, #8
+	// TMP_V = P_1 + T_0 : P_0 + T_1
+	eor	TMP_V.16b, PL.16b, TMP_V.16b
+	// PH = P_3 + P_1 + T_0 : P_2 + P_0 + T_1
+	eor	PH.16b, PH.16b, TMP_V.16b
+	// TMP_V = V_1 : V_0 = (P_1 + T_0) * g*(x)
+	pmull2	TMP_V.1q, TMP_V.2d, GSTAR.2d
+	eor	DEST.16b, PH.16b, TMP_V.16b
+	.unreq DEST
+.endm
+
+/*
+ * Compute Polyval on 8 blocks.
+ *
+ * If reduce is set, also computes the montgomery reduction of the
+ * previous full_stride call and XORs with the first message block.
+ * (m_0 + REDUCE(PL, PH))h^8 + ... + m_7h^1.
+ * I.e., the first multiplication uses m_0 + REDUCE(PL, PH) instead of m_0.
+ *
+ * Sets PL, PH.
+ */
+.macro full_stride reduce
+	eor		LO.16b, LO.16b, LO.16b
+	eor		MI.16b, MI.16b, MI.16b
+	eor		HI.16b, HI.16b, HI.16b
+
+	ld1		{M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64
+	ld1		{M4.16b, M5.16b, M6.16b, M7.16b}, [MSG], #64
+
+	karatsuba1 M7 KEY1
+	.if \reduce
+	pmull	TMP_V.1q, PL.1d, GSTAR.1d
+	.endif
+
+	karatsuba1 M6 KEY2
+	.if \reduce
+	ext	TMP_V.16b, TMP_V.16b, TMP_V.16b, #8
+	.endif
+
+	karatsuba1 M5 KEY3
+	.if \reduce
+	eor	TMP_V.16b, PL.16b, TMP_V.16b
+	.endif
+
+	karatsuba1 M4 KEY4
+	.if \reduce
+	eor	PH.16b, PH.16b, TMP_V.16b
+	.endif
+
+	karatsuba1 M3 KEY5
+	.if \reduce
+	pmull2	TMP_V.1q, TMP_V.2d, GSTAR.2d
+	.endif
+
+	karatsuba1 M2 KEY6
+	.if \reduce
+	eor	SUM.16b, PH.16b, TMP_V.16b
+	.endif
+
+	karatsuba1 M1 KEY7
+	eor	M0.16b, M0.16b, SUM.16b
+
+	karatsuba1 M0 KEY8
+	karatsuba2
+.endm
+
+/*
+ * Handle any extra blocks after full_stride loop.
+ */
+.macro partial_stride
+	add	KEY_POWERS, KEY_START, #(STRIDE_BLOCKS << 4)
+	sub	KEY_POWERS, KEY_POWERS, BLOCKS_LEFT, lsl #4
+	ld1	{KEY1.16b}, [KEY_POWERS], #16
+
+	ld1	{TMP_V.16b}, [MSG], #16
+	eor	SUM.16b, SUM.16b, TMP_V.16b
+	karatsuba1_store KEY1 SUM
+	sub	BLOCKS_LEFT, BLOCKS_LEFT, #1
+
+	tst	BLOCKS_LEFT, #4
+	beq	.Lpartial4BlocksDone
+	ld1	{M0.16b, M1.16b,  M2.16b, M3.16b}, [MSG], #64
+	ld1	{KEY8.16b, KEY7.16b, KEY6.16b,	KEY5.16b}, [KEY_POWERS], #64
+	karatsuba1 M0 KEY8
+	karatsuba1 M1 KEY7
+	karatsuba1 M2 KEY6
+	karatsuba1 M3 KEY5
+.Lpartial4BlocksDone:
+	tst	BLOCKS_LEFT, #2
+	beq	.Lpartial2BlocksDone
+	ld1	{M0.16b, M1.16b}, [MSG], #32
+	ld1	{KEY8.16b, KEY7.16b}, [KEY_POWERS], #32
+	karatsuba1 M0 KEY8
+	karatsuba1 M1 KEY7
+.Lpartial2BlocksDone:
+	tst	BLOCKS_LEFT, #1
+	beq	.LpartialDone
+	ld1	{M0.16b}, [MSG], #16
+	ld1	{KEY8.16b}, [KEY_POWERS], #16
+	karatsuba1 M0 KEY8
+.LpartialDone:
+	karatsuba2
+	montgomery_reduction SUM
+.endm
+
+/*
+ * Perform montgomery multiplication in GF(2^128) and store result in op1.
+ *
+ * Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1
+ * If op1, op2 are in montgomery form, this computes the montgomery
+ * form of op1*op2.
+ *
+ * void pmull_polyval_mul(u8 *op1, const u8 *op2);
+ */
+SYM_FUNC_START(pmull_polyval_mul)
+	adr	TMP, .Lgstar
+	ld1	{GSTAR.2d}, [TMP]
+	ld1	{v0.16b}, [x0]
+	ld1	{v1.16b}, [x1]
+	karatsuba1_store v0 v1
+	karatsuba2
+	montgomery_reduction SUM
+	st1	{SUM.16b}, [x0]
+	ret
+SYM_FUNC_END(pmull_polyval_mul)
+
+/*
+ * Perform polynomial evaluation as specified by POLYVAL.  This computes:
+ *	h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1}
+ * where n=nblocks, h is the hash key, and m_i are the message blocks.
+ *
+ * x0 - pointer to precomputed key powers h^8 ... h^1
+ * x1 - pointer to message blocks
+ * x2 - number of blocks to hash
+ * x3 - pointer to accumulator
+ *
+ * void pmull_polyval_update(const struct polyval_ctx *ctx, const u8 *in,
+ *			     size_t nblocks, u8 *accumulator);
+ */
+SYM_FUNC_START(pmull_polyval_update)
+	adr	TMP, .Lgstar
+	mov	KEY_START, KEY_POWERS
+	ld1	{GSTAR.2d}, [TMP]
+	ld1	{SUM.16b}, [ACCUMULATOR]
+	subs	BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
+	blt .LstrideLoopExit
+	ld1	{KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64
+	ld1	{KEY4.16b, KEY3.16b, KEY2.16b, KEY1.16b}, [KEY_POWERS], #64
+	full_stride 0
+	subs	BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
+	blt .LstrideLoopExitReduce
+.LstrideLoop:
+	full_stride 1
+	subs	BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
+	bge	.LstrideLoop
+.LstrideLoopExitReduce:
+	montgomery_reduction SUM
+.LstrideLoopExit:
+	adds	BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
+	beq	.LskipPartial
+	partial_stride
+.LskipPartial:
+	st1	{SUM.16b}, [ACCUMULATOR]
+	ret
+SYM_FUNC_END(pmull_polyval_update)
diff --git a/arch/arm64/crypto/polyval-ce-glue.c b/arch/arm64/crypto/polyval-ce-glue.c
new file mode 100644
index 0000000000000..0a3b5718df855
--- /dev/null
+++ b/arch/arm64/crypto/polyval-ce-glue.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Glue code for POLYVAL using ARMv8 Crypto Extensions
+ *
+ * Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
+ * Copyright (c) 2009 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ * Copyright 2021 Google LLC
+ */
+
+/*
+ * Glue code based on ghash-clmulni-intel_glue.c.
+ *
+ * This implementation of POLYVAL uses montgomery multiplication accelerated by
+ * ARMv8 Crypto Extensions instructions to implement the finite field operations.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <crypto/polyval.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+#define NUM_KEY_POWERS	8
+
+struct polyval_tfm_ctx {
+	/*
+	 * These powers must be in the order h^8, ..., h^1.
+	 */
+	u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE];
+};
+
+struct polyval_desc_ctx {
+	u8 buffer[POLYVAL_BLOCK_SIZE];
+	u32 bytes;
+};
+
+asmlinkage void pmull_polyval_update(const struct polyval_tfm_ctx *keys,
+	const u8 *in, size_t nblocks, u8 *accumulator);
+asmlinkage void pmull_polyval_mul(u8 *op1, const u8 *op2);
+
+static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
+	const u8 *in, size_t nblocks, u8 *accumulator)
+{
+	if (likely(crypto_simd_usable())) {
+		kernel_neon_begin();
+		pmull_polyval_update(keys, in, nblocks, accumulator);
+		kernel_neon_end();
+	} else {
+		polyval_update_non4k(keys->key_powers[NUM_KEY_POWERS-1], in,
+			nblocks, accumulator);
+	}
+}
+
+static void internal_polyval_mul(u8 *op1, const u8 *op2)
+{
+	if (likely(crypto_simd_usable())) {
+		kernel_neon_begin();
+		pmull_polyval_mul(op1, op2);
+		kernel_neon_end();
+	} else {
+		polyval_mul_non4k(op1, op2);
+	}
+}
+
+static int polyval_arm64_setkey(struct crypto_shash *tfm,
+			const u8 *key, unsigned int keylen)
+{
+	struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+	int i;
+
+	if (keylen != POLYVAL_BLOCK_SIZE)
+		return -EINVAL;
+
+	memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE);
+
+	for (i = NUM_KEY_POWERS-2; i >= 0; i--) {
+		memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE);
+		internal_polyval_mul(tctx->key_powers[i],
+				     tctx->key_powers[i+1]);
+	}
+
+	return 0;
+}
+
+static int polyval_arm64_init(struct shash_desc *desc)
+{
+	struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	memset(dctx, 0, sizeof(*dctx));
+
+	return 0;
+}
+
+static int polyval_arm64_update(struct shash_desc *desc,
+			 const u8 *src, unsigned int srclen)
+{
+	struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
+	const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	u8 *pos;
+	unsigned int nblocks;
+	unsigned int n;
+
+	if (dctx->bytes) {
+		n = min(srclen, dctx->bytes);
+		pos = dctx->buffer + POLYVAL_BLOCK_SIZE - dctx->bytes;
+
+		dctx->bytes -= n;
+		srclen -= n;
+
+		while (n--)
+			*pos++ ^= *src++;
+
+		if (!dctx->bytes)
+			internal_polyval_mul(dctx->buffer,
+					    tctx->key_powers[NUM_KEY_POWERS-1]);
+	}
+
+	while (srclen >= POLYVAL_BLOCK_SIZE) {
+		/* allow rescheduling every 4K bytes */
+		nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
+		internal_polyval_update(tctx, src, nblocks, dctx->buffer);
+		srclen -= nblocks * POLYVAL_BLOCK_SIZE;
+		src += nblocks * POLYVAL_BLOCK_SIZE;
+	}
+
+	if (srclen) {
+		dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
+		pos = dctx->buffer;
+		while (srclen--)
+			*pos++ ^= *src++;
+	}
+
+	return 0;
+}
+
+static int polyval_arm64_final(struct shash_desc *desc, u8 *dst)
+{
+	struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
+	const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+
+	if (dctx->bytes) {
+		internal_polyval_mul(dctx->buffer,
+				     tctx->key_powers[NUM_KEY_POWERS-1]);
+	}
+
+	memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE);
+
+	return 0;
+}
+
+static struct shash_alg polyval_alg = {
+	.digestsize	= POLYVAL_DIGEST_SIZE,
+	.init		= polyval_arm64_init,
+	.update		= polyval_arm64_update,
+	.final		= polyval_arm64_final,
+	.setkey		= polyval_arm64_setkey,
+	.descsize	= sizeof(struct polyval_desc_ctx),
+	.base		= {
+		.cra_name		= "polyval",
+		.cra_driver_name	= "polyval-ce",
+		.cra_priority		= 200,
+		.cra_blocksize		= POLYVAL_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct polyval_tfm_ctx),
+		.cra_module		= THIS_MODULE,
+	},
+};
+
+static int __init polyval_ce_mod_init(void)
+{
+	return crypto_register_shash(&polyval_alg);
+}
+
+static void __exit polyval_ce_mod_exit(void)
+{
+	crypto_unregister_shash(&polyval_alg);
+}
+
+module_cpu_feature_match(PMULL, polyval_ce_mod_init)
+module_exit(polyval_ce_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("POLYVAL hash function accelerated by ARMv8 Crypto Extensions");
+MODULE_ALIAS_CRYPTO("polyval");
+MODULE_ALIAS_CRYPTO("polyval-ce");
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 2831685adf6fb..04d07ab744b2e 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -61,14 +61,15 @@ sha256-ssse3-$(CONFIG_AS_SHA256_NI) += sha256_ni_asm.o
 obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
 sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
 
-obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
-blake2s-x86_64-y := blake2s-shash.o
-obj-$(if $(CONFIG_CRYPTO_BLAKE2S_X86),y) += libblake2s-x86_64.o
+obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o
 libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o
 
 obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
 
+obj-$(CONFIG_CRYPTO_POLYVAL_CLMUL_NI) += polyval-clmulni.o
+polyval-clmulni-y := polyval-clmulni_asm.o polyval-clmulni_glue.o
+
 obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
 crc32c-intel-y := crc32c-intel_glue.o
 crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
index 43852ba6e19c7..2402b9418cd7a 100644
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -23,6 +23,11 @@
 
 #define VMOVDQ		vmovdqu
 
+/*
+ * Note: the "x" prefix in these aliases means "this is an xmm register".  The
+ * alias prefixes have no relation to XCTR where the "X" prefix means "XOR
+ * counter".
+ */
 #define xdata0		%xmm0
 #define xdata1		%xmm1
 #define xdata2		%xmm2
@@ -31,8 +36,10 @@
 #define xdata5		%xmm5
 #define xdata6		%xmm6
 #define xdata7		%xmm7
-#define xcounter	%xmm8
-#define xbyteswap	%xmm9
+#define xcounter	%xmm8	// CTR mode only
+#define xiv		%xmm8	// XCTR mode only
+#define xbyteswap	%xmm9	// CTR mode only
+#define xtmp		%xmm9	// XCTR mode only
 #define xkey0		%xmm10
 #define xkey4		%xmm11
 #define xkey8		%xmm12
@@ -45,7 +52,7 @@
 #define p_keys		%rdx
 #define p_out		%rcx
 #define num_bytes	%r8
-
+#define counter		%r9	// XCTR mode only
 #define tmp		%r10
 #define	DDQ_DATA	0
 #define	XDATA		1
@@ -102,7 +109,7 @@ ddq_add_8:
  * do_aes num_in_par load_keys key_len
  * This increments p_in, but not p_out
  */
-.macro do_aes b, k, key_len
+.macro do_aes b, k, key_len, xctr
 	.set by, \b
 	.set load_keys, \k
 	.set klen, \key_len
@@ -111,29 +118,48 @@ ddq_add_8:
 		vmovdqa	0*16(p_keys), xkey0
 	.endif
 
-	vpshufb	xbyteswap, xcounter, xdata0
-
-	.set i, 1
-	.rept (by - 1)
-		club XDATA, i
-		vpaddq	(ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata
-		vptest	ddq_low_msk(%rip), var_xdata
-		jnz 1f
-		vpaddq	ddq_high_add_1(%rip), var_xdata, var_xdata
-		vpaddq	ddq_high_add_1(%rip), xcounter, xcounter
-		1:
-		vpshufb	xbyteswap, var_xdata, var_xdata
-		.set i, (i +1)
-	.endr
+	.if \xctr
+		movq counter, xtmp
+		.set i, 0
+		.rept (by)
+			club XDATA, i
+			vpaddq	(ddq_add_1 + 16 * i)(%rip), xtmp, var_xdata
+			.set i, (i +1)
+		.endr
+		.set i, 0
+		.rept (by)
+			club	XDATA, i
+			vpxor	xiv, var_xdata, var_xdata
+			.set i, (i +1)
+		.endr
+	.else
+		vpshufb	xbyteswap, xcounter, xdata0
+		.set i, 1
+		.rept (by - 1)
+			club XDATA, i
+			vpaddq	(ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata
+			vptest	ddq_low_msk(%rip), var_xdata
+			jnz 1f
+			vpaddq	ddq_high_add_1(%rip), var_xdata, var_xdata
+			vpaddq	ddq_high_add_1(%rip), xcounter, xcounter
+			1:
+			vpshufb	xbyteswap, var_xdata, var_xdata
+			.set i, (i +1)
+		.endr
+	.endif
 
 	vmovdqa	1*16(p_keys), xkeyA
 
 	vpxor	xkey0, xdata0, xdata0
-	vpaddq	(ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter
-	vptest	ddq_low_msk(%rip), xcounter
-	jnz	1f
-	vpaddq	ddq_high_add_1(%rip), xcounter, xcounter
-	1:
+	.if \xctr
+		add $by, counter
+	.else
+		vpaddq	(ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter
+		vptest	ddq_low_msk(%rip), xcounter
+		jnz	1f
+		vpaddq	ddq_high_add_1(%rip), xcounter, xcounter
+		1:
+	.endif
 
 	.set i, 1
 	.rept (by - 1)
@@ -371,94 +397,99 @@ ddq_add_8:
 	.endr
 .endm
 
-.macro do_aes_load val, key_len
-	do_aes \val, 1, \key_len
+.macro do_aes_load val, key_len, xctr
+	do_aes \val, 1, \key_len, \xctr
 .endm
 
-.macro do_aes_noload val, key_len
-	do_aes \val, 0, \key_len
+.macro do_aes_noload val, key_len, xctr
+	do_aes \val, 0, \key_len, \xctr
 .endm
 
 /* main body of aes ctr load */
 
-.macro do_aes_ctrmain key_len
+.macro do_aes_ctrmain key_len, xctr
 	cmp	$16, num_bytes
-	jb	.Ldo_return2\key_len
+	jb	.Ldo_return2\xctr\key_len
 
-	vmovdqa	byteswap_const(%rip), xbyteswap
-	vmovdqu	(p_iv), xcounter
-	vpshufb	xbyteswap, xcounter, xcounter
+	.if \xctr
+		shr	$4, counter
+		vmovdqu	(p_iv), xiv
+	.else
+		vmovdqa	byteswap_const(%rip), xbyteswap
+		vmovdqu	(p_iv), xcounter
+		vpshufb	xbyteswap, xcounter, xcounter
+	.endif
 
 	mov	num_bytes, tmp
 	and	$(7*16), tmp
-	jz	.Lmult_of_8_blks\key_len
+	jz	.Lmult_of_8_blks\xctr\key_len
 
 	/* 1 <= tmp <= 7 */
 	cmp	$(4*16), tmp
-	jg	.Lgt4\key_len
-	je	.Leq4\key_len
+	jg	.Lgt4\xctr\key_len
+	je	.Leq4\xctr\key_len
 
-.Llt4\key_len:
+.Llt4\xctr\key_len:
 	cmp	$(2*16), tmp
-	jg	.Leq3\key_len
-	je	.Leq2\key_len
+	jg	.Leq3\xctr\key_len
+	je	.Leq2\xctr\key_len
 
-.Leq1\key_len:
-	do_aes_load	1, \key_len
+.Leq1\xctr\key_len:
+	do_aes_load	1, \key_len, \xctr
 	add	$(1*16), p_out
 	and	$(~7*16), num_bytes
-	jz	.Ldo_return2\key_len
-	jmp	.Lmain_loop2\key_len
+	jz	.Ldo_return2\xctr\key_len
+	jmp	.Lmain_loop2\xctr\key_len
 
-.Leq2\key_len:
-	do_aes_load	2, \key_len
+.Leq2\xctr\key_len:
+	do_aes_load	2, \key_len, \xctr
 	add	$(2*16), p_out
 	and	$(~7*16), num_bytes
-	jz	.Ldo_return2\key_len
-	jmp	.Lmain_loop2\key_len
+	jz	.Ldo_return2\xctr\key_len
+	jmp	.Lmain_loop2\xctr\key_len
 
 
-.Leq3\key_len:
-	do_aes_load	3, \key_len
+.Leq3\xctr\key_len:
+	do_aes_load	3, \key_len, \xctr
 	add	$(3*16), p_out
 	and	$(~7*16), num_bytes
-	jz	.Ldo_return2\key_len
-	jmp	.Lmain_loop2\key_len
+	jz	.Ldo_return2\xctr\key_len
+	jmp	.Lmain_loop2\xctr\key_len
 
-.Leq4\key_len:
-	do_aes_load	4, \key_len
+.Leq4\xctr\key_len:
+	do_aes_load	4, \key_len, \xctr
 	add	$(4*16), p_out
 	and	$(~7*16), num_bytes
-	jz	.Ldo_return2\key_len
-	jmp	.Lmain_loop2\key_len
+	jz	.Ldo_return2\xctr\key_len
+	jmp	.Lmain_loop2\xctr\key_len
 
-.Lgt4\key_len:
+.Lgt4\xctr\key_len:
 	cmp	$(6*16), tmp
-	jg	.Leq7\key_len
-	je	.Leq6\key_len
+	jg	.Leq7\xctr\key_len
+	je	.Leq6\xctr\key_len
 
-.Leq5\key_len:
-	do_aes_load	5, \key_len
+.Leq5\xctr\key_len:
+	do_aes_load	5, \key_len, \xctr
 	add	$(5*16), p_out
 	and	$(~7*16), num_bytes
-	jz	.Ldo_return2\key_len
-	jmp	.Lmain_loop2\key_len
+	jz	.Ldo_return2\xctr\key_len
+	jmp	.Lmain_loop2\xctr\key_len
 
-.Leq6\key_len:
-	do_aes_load	6, \key_len
+.Leq6\xctr\key_len:
+	do_aes_load	6, \key_len, \xctr
 	add	$(6*16), p_out
 	and	$(~7*16), num_bytes
-	jz	.Ldo_return2\key_len
-	jmp	.Lmain_loop2\key_len
+	jz	.Ldo_return2\xctr\key_len
+	jmp	.Lmain_loop2\xctr\key_len
 
-.Leq7\key_len:
-	do_aes_load	7, \key_len
+.Leq7\xctr\key_len:
+	do_aes_load	7, \key_len, \xctr
 	add	$(7*16), p_out
 	and	$(~7*16), num_bytes
-	jz	.Ldo_return2\key_len
-	jmp	.Lmain_loop2\key_len
+	jz	.Ldo_return2\xctr\key_len
+	jmp	.Lmain_loop2\xctr\key_len
 
-.Lmult_of_8_blks\key_len:
+.Lmult_of_8_blks\xctr\key_len:
 	.if (\key_len != KEY_128)
 		vmovdqa	0*16(p_keys), xkey0
 		vmovdqa	4*16(p_keys), xkey4
@@ -471,17 +502,19 @@ ddq_add_8:
 		vmovdqa	9*16(p_keys), xkey12
 	.endif
 .align 16
-.Lmain_loop2\key_len:
+.Lmain_loop2\xctr\key_len:
 	/* num_bytes is a multiple of 8 and >0 */
-	do_aes_noload	8, \key_len
+	do_aes_noload	8, \key_len, \xctr
 	add	$(8*16), p_out
 	sub	$(8*16), num_bytes
-	jne	.Lmain_loop2\key_len
+	jne	.Lmain_loop2\xctr\key_len
 
-.Ldo_return2\key_len:
-	/* return updated IV */
-	vpshufb	xbyteswap, xcounter, xcounter
-	vmovdqu	xcounter, (p_iv)
+.Ldo_return2\xctr\key_len:
+	.if !\xctr
+		/* return updated IV */
+		vpshufb	xbyteswap, xcounter, xcounter
+		vmovdqu	xcounter, (p_iv)
+	.endif
 	RET
 .endm
 
@@ -494,7 +527,7 @@ ddq_add_8:
  */
 SYM_FUNC_START(aes_ctr_enc_128_avx_by8)
 	/* call the aes main loop */
-	do_aes_ctrmain KEY_128
+	do_aes_ctrmain KEY_128 0
 
 SYM_FUNC_END(aes_ctr_enc_128_avx_by8)
 
@@ -507,7 +540,7 @@ SYM_FUNC_END(aes_ctr_enc_128_avx_by8)
  */
 SYM_FUNC_START(aes_ctr_enc_192_avx_by8)
 	/* call the aes main loop */
-	do_aes_ctrmain KEY_192
+	do_aes_ctrmain KEY_192 0
 
 SYM_FUNC_END(aes_ctr_enc_192_avx_by8)
 
@@ -520,6 +553,45 @@ SYM_FUNC_END(aes_ctr_enc_192_avx_by8)
  */
 SYM_FUNC_START(aes_ctr_enc_256_avx_by8)
 	/* call the aes main loop */
-	do_aes_ctrmain KEY_256
+	do_aes_ctrmain KEY_256 0
 
 SYM_FUNC_END(aes_ctr_enc_256_avx_by8)
+
+/*
+ * routine to do AES128 XCTR enc/decrypt "by8"
+ * XMM registers are clobbered.
+ * Saving/restoring must be done at a higher level
+ * aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv, const void *keys,
+ * 	u8* out, unsigned int num_bytes, unsigned int byte_ctr)
+ */
+SYM_FUNC_START(aes_xctr_enc_128_avx_by8)
+	/* call the aes main loop */
+	do_aes_ctrmain KEY_128 1
+
+SYM_FUNC_END(aes_xctr_enc_128_avx_by8)
+
+/*
+ * routine to do AES192 XCTR enc/decrypt "by8"
+ * XMM registers are clobbered.
+ * Saving/restoring must be done at a higher level
+ * aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, const void *keys,
+ * 	u8* out, unsigned int num_bytes, unsigned int byte_ctr)
+ */
+SYM_FUNC_START(aes_xctr_enc_192_avx_by8)
+	/* call the aes main loop */
+	do_aes_ctrmain KEY_192 1
+
+SYM_FUNC_END(aes_xctr_enc_192_avx_by8)
+
+/*
+ * routine to do AES256 XCTR enc/decrypt "by8"
+ * XMM registers are clobbered.
+ * Saving/restoring must be done at a higher level
+ * aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, const void *keys,
+ * 	u8* out, unsigned int num_bytes, unsigned int byte_ctr)
+ */
+SYM_FUNC_START(aes_xctr_enc_256_avx_by8)
+	/* call the aes main loop */
+	do_aes_ctrmain KEY_256 1
+
+SYM_FUNC_END(aes_xctr_enc_256_avx_by8)
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 41901ba9d3a2c..a5b0cb3efeba5 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -135,6 +135,20 @@ asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv,
 		void *keys, u8 *out, unsigned int num_bytes);
 asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv,
 		void *keys, u8 *out, unsigned int num_bytes);
+
+
+asmlinkage void aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv,
+	const void *keys, u8 *out, unsigned int num_bytes,
+	unsigned int byte_ctr);
+
+asmlinkage void aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv,
+	const void *keys, u8 *out, unsigned int num_bytes,
+	unsigned int byte_ctr);
+
+asmlinkage void aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv,
+	const void *keys, u8 *out, unsigned int num_bytes,
+	unsigned int byte_ctr);
+
 /*
  * asmlinkage void aesni_gcm_init_avx_gen2()
  * gcm_data *my_ctx_data, context data
@@ -527,6 +541,59 @@ static int ctr_crypt(struct skcipher_request *req)
 	return err;
 }
 
+static void aesni_xctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
+				   const u8 *in, unsigned int len, u8 *iv,
+				   unsigned int byte_ctr)
+{
+	if (ctx->key_length == AES_KEYSIZE_128)
+		aes_xctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len,
+					 byte_ctr);
+	else if (ctx->key_length == AES_KEYSIZE_192)
+		aes_xctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len,
+					 byte_ctr);
+	else
+		aes_xctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len,
+					 byte_ctr);
+}
+
+static int xctr_crypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+	u8 keystream[AES_BLOCK_SIZE];
+	struct skcipher_walk walk;
+	unsigned int nbytes;
+	unsigned int byte_ctr = 0;
+	int err;
+	__le32 block[AES_BLOCK_SIZE / sizeof(__le32)];
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while ((nbytes = walk.nbytes) > 0) {
+		kernel_fpu_begin();
+		if (nbytes & AES_BLOCK_MASK)
+			aesni_xctr_enc_avx_tfm(ctx, walk.dst.virt.addr,
+				walk.src.virt.addr, nbytes & AES_BLOCK_MASK,
+				walk.iv, byte_ctr);
+		nbytes &= ~AES_BLOCK_MASK;
+		byte_ctr += walk.nbytes - nbytes;
+
+		if (walk.nbytes == walk.total && nbytes > 0) {
+			memcpy(block, walk.iv, AES_BLOCK_SIZE);
+			block[0] ^= cpu_to_le32(1 + byte_ctr / AES_BLOCK_SIZE);
+			aesni_enc(ctx, keystream, (u8 *)block);
+			crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes -
+				       nbytes, walk.src.virt.addr + walk.nbytes
+				       - nbytes, keystream, nbytes);
+			byte_ctr += nbytes;
+			nbytes = 0;
+		}
+		kernel_fpu_end();
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+	return err;
+}
+
 static int
 rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
 {
@@ -1051,6 +1118,33 @@ static
 struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
 
 #ifdef CONFIG_X86_64
+/*
+ * XCTR does not have a non-AVX implementation, so it must be enabled
+ * conditionally.
+ */
+static struct skcipher_alg aesni_xctr = {
+	.base = {
+		.cra_name		= "__xctr(aes)",
+		.cra_driver_name	= "__xctr-aes-aesni",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
+		.cra_module		= THIS_MODULE,
+	},
+	.min_keysize	= AES_MIN_KEY_SIZE,
+	.max_keysize	= AES_MAX_KEY_SIZE,
+	.ivsize		= AES_BLOCK_SIZE,
+	.chunksize	= AES_BLOCK_SIZE,
+	.setkey		= aesni_skcipher_setkey,
+	.encrypt	= xctr_crypt,
+	.decrypt	= xctr_crypt,
+};
+
+static struct simd_skcipher_alg *aesni_simd_xctr;
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_X86_64
 static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
 				  unsigned int key_len)
 {
@@ -1163,7 +1257,7 @@ static int __init aesni_init(void)
 		static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm);
 		pr_info("AES CTR mode by8 optimization enabled\n");
 	}
-#endif
+#endif /* CONFIG_X86_64 */
 
 	err = crypto_register_alg(&aesni_cipher_alg);
 	if (err)
@@ -1180,8 +1274,22 @@ static int __init aesni_init(void)
 	if (err)
 		goto unregister_skciphers;
 
+#ifdef CONFIG_X86_64
+	if (boot_cpu_has(X86_FEATURE_AVX))
+		err = simd_register_skciphers_compat(&aesni_xctr, 1,
+						     &aesni_simd_xctr);
+	if (err)
+		goto unregister_aeads;
+#endif /* CONFIG_X86_64 */
+
 	return 0;
 
+#ifdef CONFIG_X86_64
+unregister_aeads:
+	simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads),
+				aesni_simd_aeads);
+#endif /* CONFIG_X86_64 */
+
 unregister_skciphers:
 	simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
 				  aesni_simd_skciphers);
@@ -1197,6 +1305,10 @@ static void __exit aesni_exit(void)
 	simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
 				  aesni_simd_skciphers);
 	crypto_unregister_alg(&aesni_cipher_alg);
+#ifdef CONFIG_X86_64
+	if (boot_cpu_has(X86_FEATURE_AVX))
+		simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr);
+#endif /* CONFIG_X86_64 */
 }
 
 late_initcall(aesni_init);
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
index 69853c13e8fb0..aaba212305288 100644
--- a/arch/x86/crypto/blake2s-glue.c
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -4,7 +4,6 @@
  */
 
 #include <crypto/internal/blake2s.h>
-#include <crypto/internal/simd.h>
 
 #include <linux/types.h>
 #include <linux/jump_label.h>
@@ -33,7 +32,7 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block,
 	/* SIMD disables preemption, so relax after processing each page. */
 	BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
 
-	if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
+	if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) {
 		blake2s_compress_generic(state, block, nblocks, inc);
 		return;
 	}
diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c
deleted file mode 100644
index 59ae28abe35cc..0000000000000
--- a/arch/x86/crypto/blake2s-shash.c
+++ /dev/null
@@ -1,77 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
-/*
- * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- */
-
-#include <crypto/internal/blake2s.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/hash.h>
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/sizes.h>
-
-#include <asm/cpufeature.h>
-#include <asm/processor.h>
-
-static int crypto_blake2s_update_x86(struct shash_desc *desc,
-				     const u8 *in, unsigned int inlen)
-{
-	return crypto_blake2s_update(desc, in, inlen, false);
-}
-
-static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out)
-{
-	return crypto_blake2s_final(desc, out, false);
-}
-
-#define BLAKE2S_ALG(name, driver_name, digest_size)			\
-	{								\
-		.base.cra_name		= name,				\
-		.base.cra_driver_name	= driver_name,			\
-		.base.cra_priority	= 200,				\
-		.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,	\
-		.base.cra_blocksize	= BLAKE2S_BLOCK_SIZE,		\
-		.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx), \
-		.base.cra_module	= THIS_MODULE,			\
-		.digestsize		= digest_size,			\
-		.setkey			= crypto_blake2s_setkey,	\
-		.init			= crypto_blake2s_init,		\
-		.update			= crypto_blake2s_update_x86,	\
-		.final			= crypto_blake2s_final_x86,	\
-		.descsize		= sizeof(struct blake2s_state),	\
-	}
-
-static struct shash_alg blake2s_algs[] = {
-	BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE),
-	BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE),
-	BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE),
-	BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE),
-};
-
-static int __init blake2s_mod_init(void)
-{
-	if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
-		return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
-	return 0;
-}
-
-static void __exit blake2s_mod_exit(void)
-{
-	if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
-		crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
-}
-
-module_init(blake2s_mod_init);
-module_exit(blake2s_mod_exit);
-
-MODULE_ALIAS_CRYPTO("blake2s-128");
-MODULE_ALIAS_CRYPTO("blake2s-128-x86");
-MODULE_ALIAS_CRYPTO("blake2s-160");
-MODULE_ALIAS_CRYPTO("blake2s-160-x86");
-MODULE_ALIAS_CRYPTO("blake2s-224");
-MODULE_ALIAS_CRYPTO("blake2s-224-x86");
-MODULE_ALIAS_CRYPTO("blake2s-256");
-MODULE_ALIAS_CRYPTO("blake2s-256-x86");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/crypto/polyval-clmulni_asm.S b/arch/x86/crypto/polyval-clmulni_asm.S
new file mode 100644
index 0000000000000..a6ebe4e7dd2b7
--- /dev/null
+++ b/arch/x86/crypto/polyval-clmulni_asm.S
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2021 Google LLC
+ */
+/*
+ * This is an efficient implementation of POLYVAL using intel PCLMULQDQ-NI
+ * instructions. It works on 8 blocks at a time, by precomputing the first 8
+ * keys powers h^8, ..., h^1 in the POLYVAL finite field. This precomputation
+ * allows us to split finite field multiplication into two steps.
+ *
+ * In the first step, we consider h^i, m_i as normal polynomials of degree less
+ * than 128. We then compute p(x) = h^8m_0 + ... + h^1m_7 where multiplication
+ * is simply polynomial multiplication.
+ *
+ * In the second step, we compute the reduction of p(x) modulo the finite field
+ * modulus g(x) = x^128 + x^127 + x^126 + x^121 + 1.
+ *
+ * This two step process is equivalent to computing h^8m_0 + ... + h^1m_7 where
+ * multiplication is finite field multiplication. The advantage is that the
+ * two-step process  only requires 1 finite field reduction for every 8
+ * polynomial multiplications. Further parallelism is gained by interleaving the
+ * multiplications and polynomial reductions.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define STRIDE_BLOCKS 8
+
+#define GSTAR %xmm7
+#define PL %xmm8
+#define PH %xmm9
+#define TMP_XMM %xmm11
+#define LO %xmm12
+#define HI %xmm13
+#define MI %xmm14
+#define SUM %xmm15
+
+#define KEY_POWERS %rdi
+#define MSG %rsi
+#define BLOCKS_LEFT %rdx
+#define ACCUMULATOR %rcx
+#define TMP %rax
+
+.section    .rodata.cst16.gstar, "aM", @progbits, 16
+.align 16
+
+.Lgstar:
+	.quad 0xc200000000000000, 0xc200000000000000
+
+.text
+
+/*
+ * Performs schoolbook1_iteration on two lists of 128-bit polynomials of length
+ * count pointed to by MSG and KEY_POWERS.
+ */
+.macro schoolbook1 count
+	.set i, 0
+	.rept (\count)
+		schoolbook1_iteration i 0
+		.set i, (i +1)
+	.endr
+.endm
+
+/*
+ * Computes the product of two 128-bit polynomials at the memory locations
+ * specified by (MSG + 16*i) and (KEY_POWERS + 16*i) and XORs the components of
+ * the 256-bit product into LO, MI, HI.
+ *
+ * Given:
+ *   X = [X_1 : X_0]
+ *   Y = [Y_1 : Y_0]
+ *
+ * We compute:
+ *   LO += X_0 * Y_0
+ *   MI += X_0 * Y_1 + X_1 * Y_0
+ *   HI += X_1 * Y_1
+ *
+ * Later, the 256-bit result can be extracted as:
+ *   [HI_1 : HI_0 + MI_1 : LO_1 + MI_0 : LO_0]
+ * This step is done when computing the polynomial reduction for efficiency
+ * reasons.
+ *
+ * If xor_sum == 1, then also XOR the value of SUM into m_0.  This avoids an
+ * extra multiplication of SUM and h^8.
+ */
+.macro schoolbook1_iteration i xor_sum
+	movups (16*\i)(MSG), %xmm0
+	.if (\i == 0 && \xor_sum == 1)
+		pxor SUM, %xmm0
+	.endif
+	vpclmulqdq $0x01, (16*\i)(KEY_POWERS), %xmm0, %xmm2
+	vpclmulqdq $0x00, (16*\i)(KEY_POWERS), %xmm0, %xmm1
+	vpclmulqdq $0x10, (16*\i)(KEY_POWERS), %xmm0, %xmm3
+	vpclmulqdq $0x11, (16*\i)(KEY_POWERS), %xmm0, %xmm4
+	vpxor %xmm2, MI, MI
+	vpxor %xmm1, LO, LO
+	vpxor %xmm4, HI, HI
+	vpxor %xmm3, MI, MI
+.endm
+
+/*
+ * Performs the same computation as schoolbook1_iteration, except we expect the
+ * arguments to already be loaded into xmm0 and xmm1 and we set the result
+ * registers LO, MI, and HI directly rather than XOR'ing into them.
+ */
+.macro schoolbook1_noload
+	vpclmulqdq $0x01, %xmm0, %xmm1, MI
+	vpclmulqdq $0x10, %xmm0, %xmm1, %xmm2
+	vpclmulqdq $0x00, %xmm0, %xmm1, LO
+	vpclmulqdq $0x11, %xmm0, %xmm1, HI
+	vpxor %xmm2, MI, MI
+.endm
+
+/*
+ * Computes the 256-bit polynomial represented by LO, HI, MI. Stores
+ * the result in PL, PH.
+ *   [PH : PL] = [HI_1 : HI_0 + MI_1 : LO_1 + MI_0 : LO_0]
+ */
+.macro schoolbook2
+	vpslldq $8, MI, PL
+	vpsrldq $8, MI, PH
+	pxor LO, PL
+	pxor HI, PH
+.endm
+
+/*
+ * Computes the 128-bit reduction of PH : PL. Stores the result in dest.
+ *
+ * This macro computes p(x) mod g(x) where p(x) is in montgomery form and g(x) =
+ * x^128 + x^127 + x^126 + x^121 + 1.
+ *
+ * We have a 256-bit polynomial PH : PL = P_3 : P_2 : P_1 : P_0 that is the
+ * product of two 128-bit polynomials in Montgomery form.  We need to reduce it
+ * mod g(x).  Also, since polynomials in Montgomery form have an "extra" factor
+ * of x^128, this product has two extra factors of x^128.  To get it back into
+ * Montgomery form, we need to remove one of these factors by dividing by x^128.
+ *
+ * To accomplish both of these goals, we add multiples of g(x) that cancel out
+ * the low 128 bits P_1 : P_0, leaving just the high 128 bits. Since the low
+ * bits are zero, the polynomial division by x^128 can be done by right shifting.
+ *
+ * Since the only nonzero term in the low 64 bits of g(x) is the constant term,
+ * the multiple of g(x) needed to cancel out P_0 is P_0 * g(x).  The CPU can
+ * only do 64x64 bit multiplications, so split P_0 * g(x) into x^128 * P_0 +
+ * x^64 * g*(x) * P_0 + P_0, where g*(x) is bits 64-127 of g(x).  Adding this to
+ * the original polynomial gives P_3 : P_2 + P_0 + T_1 : P_1 + T_0 : 0, where T
+ * = T_1 : T_0 = g*(x) * P_0.  Thus, bits 0-63 got "folded" into bits 64-191.
+ *
+ * Repeating this same process on the next 64 bits "folds" bits 64-127 into bits
+ * 128-255, giving the answer in bits 128-255. This time, we need to cancel P_1
+ * + T_0 in bits 64-127. The multiple of g(x) required is (P_1 + T_0) * g(x) *
+ * x^64. Adding this to our previous computation gives P_3 + P_1 + T_0 + V_1 :
+ * P_2 + P_0 + T_1 + V_0 : 0 : 0, where V = V_1 : V_0 = g*(x) * (P_1 + T_0).
+ *
+ * So our final computation is:
+ *   T = T_1 : T_0 = g*(x) * P_0
+ *   V = V_1 : V_0 = g*(x) * (P_1 + T_0)
+ *   p(x) / x^{128} mod g(x) = P_3 + P_1 + T_0 + V_1 : P_2 + P_0 + T_1 + V_0
+ *
+ * The implementation below saves a XOR instruction by computing P_1 + T_0 : P_0
+ * + T_1 and XORing into dest, rather than separately XORing P_1 : P_0 and T_0 :
+ * T_1 into dest.  This allows us to reuse P_1 + T_0 when computing V.
+ */
+.macro montgomery_reduction dest
+	vpclmulqdq $0x00, PL, GSTAR, TMP_XMM	# TMP_XMM = T_1 : T_0 = P_0 * g*(x)
+	pshufd $0b01001110, TMP_XMM, TMP_XMM	# TMP_XMM = T_0 : T_1
+	pxor PL, TMP_XMM			# TMP_XMM = P_1 + T_0 : P_0 + T_1
+	pxor TMP_XMM, PH			# PH = P_3 + P_1 + T_0 : P_2 + P_0 + T_1
+	pclmulqdq $0x11, GSTAR, TMP_XMM		# TMP_XMM = V_1 : V_0 = V = [(P_1 + T_0) * g*(x)]
+	vpxor TMP_XMM, PH, \dest
+.endm
+
+/*
+ * Compute schoolbook multiplication for 8 blocks
+ * m_0h^8 + ... + m_7h^1
+ *
+ * If reduce is set, also computes the montgomery reduction of the
+ * previous full_stride call and XORs with the first message block.
+ * (m_0 + REDUCE(PL, PH))h^8 + ... + m_7h^1.
+ * I.e., the first multiplication uses m_0 + REDUCE(PL, PH) instead of m_0.
+ */
+.macro full_stride reduce
+	pxor LO, LO
+	pxor HI, HI
+	pxor MI, MI
+
+	schoolbook1_iteration 7 0
+	.if \reduce
+		vpclmulqdq $0x00, PL, GSTAR, TMP_XMM
+	.endif
+
+	schoolbook1_iteration 6 0
+	.if \reduce
+		pshufd $0b01001110, TMP_XMM, TMP_XMM
+	.endif
+
+	schoolbook1_iteration 5 0
+	.if \reduce
+		pxor PL, TMP_XMM
+	.endif
+
+	schoolbook1_iteration 4 0
+	.if \reduce
+		pxor TMP_XMM, PH
+	.endif
+
+	schoolbook1_iteration 3 0
+	.if \reduce
+		pclmulqdq $0x11, GSTAR, TMP_XMM
+	.endif
+
+	schoolbook1_iteration 2 0
+	.if \reduce
+		vpxor TMP_XMM, PH, SUM
+	.endif
+
+	schoolbook1_iteration 1 0
+
+	schoolbook1_iteration 0 1
+
+	addq $(8*16), MSG
+	schoolbook2
+.endm
+
+/*
+ * Process BLOCKS_LEFT blocks, where 0 < BLOCKS_LEFT < STRIDE_BLOCKS
+ */
+.macro partial_stride
+	mov BLOCKS_LEFT, TMP
+	shlq $4, TMP
+	addq $(16*STRIDE_BLOCKS), KEY_POWERS
+	subq TMP, KEY_POWERS
+
+	movups (MSG), %xmm0
+	pxor SUM, %xmm0
+	movaps (KEY_POWERS), %xmm1
+	schoolbook1_noload
+	dec BLOCKS_LEFT
+	addq $16, MSG
+	addq $16, KEY_POWERS
+
+	test $4, BLOCKS_LEFT
+	jz .Lpartial4BlocksDone
+	schoolbook1 4
+	addq $(4*16), MSG
+	addq $(4*16), KEY_POWERS
+.Lpartial4BlocksDone:
+	test $2, BLOCKS_LEFT
+	jz .Lpartial2BlocksDone
+	schoolbook1 2
+	addq $(2*16), MSG
+	addq $(2*16), KEY_POWERS
+.Lpartial2BlocksDone:
+	test $1, BLOCKS_LEFT
+	jz .LpartialDone
+	schoolbook1 1
+.LpartialDone:
+	schoolbook2
+	montgomery_reduction SUM
+.endm
+
+/*
+ * Perform montgomery multiplication in GF(2^128) and store result in op1.
+ *
+ * Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1
+ * If op1, op2 are in montgomery form, this computes the montgomery
+ * form of op1*op2.
+ *
+ * void clmul_polyval_mul(u8 *op1, const u8 *op2);
+ */
+SYM_FUNC_START(clmul_polyval_mul)
+	FRAME_BEGIN
+	vmovdqa .Lgstar(%rip), GSTAR
+	movups (%rdi), %xmm0
+	movups (%rsi), %xmm1
+	schoolbook1_noload
+	schoolbook2
+	montgomery_reduction SUM
+	movups SUM, (%rdi)
+	FRAME_END
+	RET
+SYM_FUNC_END(clmul_polyval_mul)
+
+/*
+ * Perform polynomial evaluation as specified by POLYVAL.  This computes:
+ *	h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1}
+ * where n=nblocks, h is the hash key, and m_i are the message blocks.
+ *
+ * rdi - pointer to precomputed key powers h^8 ... h^1
+ * rsi - pointer to message blocks
+ * rdx - number of blocks to hash
+ * rcx - pointer to the accumulator
+ *
+ * void clmul_polyval_update(const struct polyval_tfm_ctx *keys,
+ *	const u8 *in, size_t nblocks, u8 *accumulator);
+ */
+SYM_FUNC_START(clmul_polyval_update)
+	FRAME_BEGIN
+	vmovdqa .Lgstar(%rip), GSTAR
+	movups (ACCUMULATOR), SUM
+	subq $STRIDE_BLOCKS, BLOCKS_LEFT
+	js .LstrideLoopExit
+	full_stride 0
+	subq $STRIDE_BLOCKS, BLOCKS_LEFT
+	js .LstrideLoopExitReduce
+.LstrideLoop:
+	full_stride 1
+	subq $STRIDE_BLOCKS, BLOCKS_LEFT
+	jns .LstrideLoop
+.LstrideLoopExitReduce:
+	montgomery_reduction SUM
+.LstrideLoopExit:
+	add $STRIDE_BLOCKS, BLOCKS_LEFT
+	jz .LskipPartial
+	partial_stride
+.LskipPartial:
+	movups SUM, (ACCUMULATOR)
+	FRAME_END
+	RET
+SYM_FUNC_END(clmul_polyval_update)
diff --git a/arch/x86/crypto/polyval-clmulni_glue.c b/arch/x86/crypto/polyval-clmulni_glue.c
new file mode 100644
index 0000000000000..b7664d0188510
--- /dev/null
+++ b/arch/x86/crypto/polyval-clmulni_glue.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Glue code for POLYVAL using PCMULQDQ-NI
+ *
+ * Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
+ * Copyright (c) 2009 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ * Copyright 2021 Google LLC
+ */
+
+/*
+ * Glue code based on ghash-clmulni-intel_glue.c.
+ *
+ * This implementation of POLYVAL uses montgomery multiplication
+ * accelerated by PCLMULQDQ-NI to implement the finite field
+ * operations.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <crypto/polyval.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/cpu_device_id.h>
+#include <asm/simd.h>
+
+#define NUM_KEY_POWERS	8
+
+struct polyval_tfm_ctx {
+	/*
+	 * These powers must be in the order h^8, ..., h^1.
+	 */
+	u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE];
+};
+
+struct polyval_desc_ctx {
+	u8 buffer[POLYVAL_BLOCK_SIZE];
+	u32 bytes;
+};
+
+asmlinkage void clmul_polyval_update(const struct polyval_tfm_ctx *keys,
+	const u8 *in, size_t nblocks, u8 *accumulator);
+asmlinkage void clmul_polyval_mul(u8 *op1, const u8 *op2);
+
+static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
+	const u8 *in, size_t nblocks, u8 *accumulator)
+{
+	if (likely(crypto_simd_usable())) {
+		kernel_fpu_begin();
+		clmul_polyval_update(keys, in, nblocks, accumulator);
+		kernel_fpu_end();
+	} else {
+		polyval_update_non4k(keys->key_powers[NUM_KEY_POWERS-1], in,
+			nblocks, accumulator);
+	}
+}
+
+static void internal_polyval_mul(u8 *op1, const u8 *op2)
+{
+	if (likely(crypto_simd_usable())) {
+		kernel_fpu_begin();
+		clmul_polyval_mul(op1, op2);
+		kernel_fpu_end();
+	} else {
+		polyval_mul_non4k(op1, op2);
+	}
+}
+
+static int polyval_x86_setkey(struct crypto_shash *tfm,
+			const u8 *key, unsigned int keylen)
+{
+	struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+	int i;
+
+	if (keylen != POLYVAL_BLOCK_SIZE)
+		return -EINVAL;
+
+	memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE);
+
+	for (i = NUM_KEY_POWERS-2; i >= 0; i--) {
+		memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE);
+		internal_polyval_mul(tctx->key_powers[i],
+				     tctx->key_powers[i+1]);
+	}
+
+	return 0;
+}
+
+static int polyval_x86_init(struct shash_desc *desc)
+{
+	struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	memset(dctx, 0, sizeof(*dctx));
+
+	return 0;
+}
+
+static int polyval_x86_update(struct shash_desc *desc,
+			 const u8 *src, unsigned int srclen)
+{
+	struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
+	const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	u8 *pos;
+	unsigned int nblocks;
+	unsigned int n;
+
+	if (dctx->bytes) {
+		n = min(srclen, dctx->bytes);
+		pos = dctx->buffer + POLYVAL_BLOCK_SIZE - dctx->bytes;
+
+		dctx->bytes -= n;
+		srclen -= n;
+
+		while (n--)
+			*pos++ ^= *src++;
+
+		if (!dctx->bytes)
+			internal_polyval_mul(dctx->buffer,
+					    tctx->key_powers[NUM_KEY_POWERS-1]);
+	}
+
+	while (srclen >= POLYVAL_BLOCK_SIZE) {
+		/* Allow rescheduling every 4K bytes. */
+		nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
+		internal_polyval_update(tctx, src, nblocks, dctx->buffer);
+		srclen -= nblocks * POLYVAL_BLOCK_SIZE;
+		src += nblocks * POLYVAL_BLOCK_SIZE;
+	}
+
+	if (srclen) {
+		dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
+		pos = dctx->buffer;
+		while (srclen--)
+			*pos++ ^= *src++;
+	}
+
+	return 0;
+}
+
+static int polyval_x86_final(struct shash_desc *desc, u8 *dst)
+{
+	struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
+	const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+
+	if (dctx->bytes) {
+		internal_polyval_mul(dctx->buffer,
+				     tctx->key_powers[NUM_KEY_POWERS-1]);
+	}
+
+	memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE);
+
+	return 0;
+}
+
+static struct shash_alg polyval_alg = {
+	.digestsize	= POLYVAL_DIGEST_SIZE,
+	.init		= polyval_x86_init,
+	.update		= polyval_x86_update,
+	.final		= polyval_x86_final,
+	.setkey		= polyval_x86_setkey,
+	.descsize	= sizeof(struct polyval_desc_ctx),
+	.base		= {
+		.cra_name		= "polyval",
+		.cra_driver_name	= "polyval-clmulni",
+		.cra_priority		= 200,
+		.cra_blocksize		= POLYVAL_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct polyval_tfm_ctx),
+		.cra_module		= THIS_MODULE,
+	},
+};
+
+__maybe_unused static const struct x86_cpu_id pcmul_cpu_id[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
+
+static int __init polyval_clmulni_mod_init(void)
+{
+	if (!x86_match_cpu(pcmul_cpu_id))
+		return -ENODEV;
+
+	if (!boot_cpu_has(X86_FEATURE_AVX))
+		return -ENODEV;
+
+	return crypto_register_shash(&polyval_alg);
+}
+
+static void __exit polyval_clmulni_mod_exit(void)
+{
+	crypto_unregister_shash(&polyval_alg);
+}
+
+module_init(polyval_clmulni_mod_init);
+module_exit(polyval_clmulni_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("POLYVAL hash function accelerated by PCLMULQDQ-NI");
+MODULE_ALIAS_CRYPTO("polyval");
+MODULE_ALIAS_CRYPTO("polyval-clmulni");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 1d44893a997ba..59489a300cd10 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -461,6 +461,15 @@ config CRYPTO_PCBC
 	  PCBC: Propagating Cipher Block Chaining mode
 	  This block cipher algorithm is required for RxRPC.
 
+config CRYPTO_XCTR
+	tristate
+	select CRYPTO_SKCIPHER
+	select CRYPTO_MANAGER
+	help
+	  XCTR: XOR Counter mode. This blockcipher mode is a variant of CTR mode
+	  using XORs and little-endian addition rather than big-endian arithmetic.
+	  XCTR mode is used to implement HCTR2.
+
 config CRYPTO_XTS
 	tristate "XTS support"
 	select CRYPTO_SKCIPHER
@@ -524,6 +533,17 @@ config CRYPTO_ADIANTUM
 
 	  If unsure, say N.
 
+config CRYPTO_HCTR2
+	tristate "HCTR2 support"
+	select CRYPTO_XCTR
+	select CRYPTO_POLYVAL
+	select CRYPTO_MANAGER
+	help
+	  HCTR2 is a length-preserving encryption mode for storage encryption that
+	  is efficient on processors with instructions to accelerate AES and
+	  carryless multiplication, e.g. x86 processors with AES-NI and CLMUL, and
+	  ARM processors with the ARMv8 crypto extensions.
+
 config CRYPTO_ESSIV
 	tristate "ESSIV support for block encryption"
 	select CRYPTO_AUTHENC
@@ -692,26 +712,8 @@ config CRYPTO_BLAKE2B
 
 	  See https://blake2.net for further information.
 
-config CRYPTO_BLAKE2S
-	tristate "BLAKE2s digest algorithm"
-	select CRYPTO_LIB_BLAKE2S_GENERIC
-	select CRYPTO_HASH
-	help
-	  Implementation of cryptographic hash function BLAKE2s
-	  optimized for 8-32bit platforms and can produce digests of any size
-	  between 1 to 32.  The keyed hash is also implemented.
-
-	  This module provides the following algorithms:
-
-	  - blake2s-128
-	  - blake2s-160
-	  - blake2s-224
-	  - blake2s-256
-
-	  See https://blake2.net for further information.
-
 config CRYPTO_BLAKE2S_X86
-	tristate "BLAKE2s digest algorithm (x86 accelerated version)"
+	bool "BLAKE2s digest algorithm (x86 accelerated version)"
 	depends on X86 && 64BIT
 	select CRYPTO_LIB_BLAKE2S_GENERIC
 	select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
@@ -765,6 +767,23 @@ config CRYPTO_GHASH
 	  GHASH is the hash function used in GCM (Galois/Counter Mode).
 	  It is not a general-purpose cryptographic hash function.
 
+config CRYPTO_POLYVAL
+	tristate
+	select CRYPTO_GF128MUL
+	select CRYPTO_HASH
+	help
+	  POLYVAL is the hash function used in HCTR2.  It is not a general-purpose
+	  cryptographic hash function.
+
+config CRYPTO_POLYVAL_CLMUL_NI
+	tristate "POLYVAL hash function (CLMUL-NI accelerated)"
+	depends on X86 && 64BIT
+	select CRYPTO_POLYVAL
+	help
+	  This is the x86_64 CLMUL-NI accelerated implementation of POLYVAL. It is
+	  used to efficiently implement HCTR2 on x86-64 processors that support
+	  carry-less multiplication instructions.
+
 config CRYPTO_POLY1305
 	tristate "Poly1305 authenticator algorithm"
 	select CRYPTO_HASH
@@ -1142,7 +1161,7 @@ config CRYPTO_AES_NI_INTEL
 	  In addition to AES cipher algorithm support, the acceleration
 	  for some popular block cipher mode is supported too, including
 	  ECB, CBC, LRW, XTS. The 64 bit version has additional
-	  acceleration for CTR.
+	  acceleration for CTR and XCTR.
 
 config CRYPTO_AES_SPARC64
 	tristate "AES cipher algorithms (SPARC64)"
diff --git a/crypto/Makefile b/crypto/Makefile
index ceaaa9f34145a..a4a84860fe43d 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -84,7 +84,6 @@ obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
 obj-$(CONFIG_CRYPTO_WP512) += wp512.o
 CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns)  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
 obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o
-obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o
 obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
 obj-$(CONFIG_CRYPTO_ECB) += ecb.o
 obj-$(CONFIG_CRYPTO_CBC) += cbc.o
@@ -94,6 +93,8 @@ obj-$(CONFIG_CRYPTO_CTS) += cts.o
 obj-$(CONFIG_CRYPTO_LRW) += lrw.o
 obj-$(CONFIG_CRYPTO_XTS) += xts.o
 obj-$(CONFIG_CRYPTO_CTR) += ctr.o
+obj-$(CONFIG_CRYPTO_XCTR) += xctr.o
+obj-$(CONFIG_CRYPTO_HCTR2) += hctr2.o
 obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o
 obj-$(CONFIG_CRYPTO_ADIANTUM) += adiantum.o
 obj-$(CONFIG_CRYPTO_NHPOLY1305) += nhpoly1305.o
@@ -171,6 +172,7 @@ UBSAN_SANITIZE_jitterentropy.o = n
 jitterentropy_rng-y := jitterentropy.o jitterentropy-kcapi.o
 obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
 obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o
+obj-$(CONFIG_CRYPTO_POLYVAL) += polyval-generic.o
 obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o
 obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o
 obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c
deleted file mode 100644
index 5f96a21f87883..0000000000000
--- a/crypto/blake2s_generic.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
-/*
- * shash interface to the generic implementation of BLAKE2s
- *
- * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- */
-
-#include <crypto/internal/blake2s.h>
-#include <crypto/internal/hash.h>
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-static int crypto_blake2s_update_generic(struct shash_desc *desc,
-					 const u8 *in, unsigned int inlen)
-{
-	return crypto_blake2s_update(desc, in, inlen, true);
-}
-
-static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out)
-{
-	return crypto_blake2s_final(desc, out, true);
-}
-
-#define BLAKE2S_ALG(name, driver_name, digest_size)			\
-	{								\
-		.base.cra_name		= name,				\
-		.base.cra_driver_name	= driver_name,			\
-		.base.cra_priority	= 100,				\
-		.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,	\
-		.base.cra_blocksize	= BLAKE2S_BLOCK_SIZE,		\
-		.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx), \
-		.base.cra_module	= THIS_MODULE,			\
-		.digestsize		= digest_size,			\
-		.setkey			= crypto_blake2s_setkey,	\
-		.init			= crypto_blake2s_init,		\
-		.update			= crypto_blake2s_update_generic, \
-		.final			= crypto_blake2s_final_generic,	\
-		.descsize		= sizeof(struct blake2s_state),	\
-	}
-
-static struct shash_alg blake2s_algs[] = {
-	BLAKE2S_ALG("blake2s-128", "blake2s-128-generic",
-		    BLAKE2S_128_HASH_SIZE),
-	BLAKE2S_ALG("blake2s-160", "blake2s-160-generic",
-		    BLAKE2S_160_HASH_SIZE),
-	BLAKE2S_ALG("blake2s-224", "blake2s-224-generic",
-		    BLAKE2S_224_HASH_SIZE),
-	BLAKE2S_ALG("blake2s-256", "blake2s-256-generic",
-		    BLAKE2S_256_HASH_SIZE),
-};
-
-static int __init blake2s_mod_init(void)
-{
-	return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
-}
-
-static void __exit blake2s_mod_exit(void)
-{
-	crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
-}
-
-subsys_initcall(blake2s_mod_init);
-module_exit(blake2s_mod_exit);
-
-MODULE_ALIAS_CRYPTO("blake2s-128");
-MODULE_ALIAS_CRYPTO("blake2s-128-generic");
-MODULE_ALIAS_CRYPTO("blake2s-160");
-MODULE_ALIAS_CRYPTO("blake2s-160-generic");
-MODULE_ALIAS_CRYPTO("blake2s-224");
-MODULE_ALIAS_CRYPTO("blake2s-224-generic");
-MODULE_ALIAS_CRYPTO("blake2s-256");
-MODULE_ALIAS_CRYPTO("blake2s-256-generic");
-MODULE_LICENSE("GPL v2");
diff --git a/crypto/hctr2.c b/crypto/hctr2.c
new file mode 100644
index 0000000000000..7d00a3bcb6670
--- /dev/null
+++ b/crypto/hctr2.c
@@ -0,0 +1,581 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HCTR2 length-preserving encryption mode
+ *
+ * Copyright 2021 Google LLC
+ */
+
+
+/*
+ * HCTR2 is a length-preserving encryption mode that is efficient on
+ * processors with instructions to accelerate AES and carryless
+ * multiplication, e.g. x86 processors with AES-NI and CLMUL, and ARM
+ * processors with the ARMv8 crypto extensions.
+ *
+ * For more details, see the paper: "Length-preserving encryption with HCTR2"
+ * (https://eprint.iacr.org/2021/1441.pdf)
+ */
+
+#include <crypto/internal/cipher.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/polyval.h>
+#include <crypto/scatterwalk.h>
+#include <linux/module.h>
+
+#define BLOCKCIPHER_BLOCK_SIZE		16
+
+/*
+ * The specification allows variable-length tweaks, but Linux's crypto API
+ * currently only allows algorithms to support a single length.  The "natural"
+ * tweak length for HCTR2 is 16, since that fits into one POLYVAL block for
+ * the best performance.  But longer tweaks are useful for fscrypt, to avoid
+ * needing to derive per-file keys.  So instead we use two blocks, or 32 bytes.
+ */
+#define TWEAK_SIZE		32
+
+struct hctr2_instance_ctx {
+	struct crypto_cipher_spawn blockcipher_spawn;
+	struct crypto_skcipher_spawn xctr_spawn;
+	struct crypto_shash_spawn polyval_spawn;
+};
+
+struct hctr2_tfm_ctx {
+	struct crypto_cipher *blockcipher;
+	struct crypto_skcipher *xctr;
+	struct crypto_shash *polyval;
+	u8 L[BLOCKCIPHER_BLOCK_SIZE];
+	int hashed_tweak_offset;
+	/*
+	 * This struct is allocated with extra space for two exported hash
+	 * states.  Since the hash state size is not known at compile-time, we
+	 * can't add these to the struct directly.
+	 *
+	 * hashed_tweaklen_divisible;
+	 * hashed_tweaklen_remainder;
+	 */
+};
+
+struct hctr2_request_ctx {
+	u8 first_block[BLOCKCIPHER_BLOCK_SIZE];
+	u8 xctr_iv[BLOCKCIPHER_BLOCK_SIZE];
+	struct scatterlist *bulk_part_dst;
+	struct scatterlist *bulk_part_src;
+	struct scatterlist sg_src[2];
+	struct scatterlist sg_dst[2];
+	/*
+	 * Sub-request sizes are unknown at compile-time, so they need to go
+	 * after the members with known sizes.
+	 */
+	union {
+		struct shash_desc hash_desc;
+		struct skcipher_request xctr_req;
+	} u;
+	/*
+	 * This struct is allocated with extra space for one exported hash
+	 * state.  Since the hash state size is not known at compile-time, we
+	 * can't add it to the struct directly.
+	 *
+	 * hashed_tweak;
+	 */
+};
+
+static inline u8 *hctr2_hashed_tweaklen(const struct hctr2_tfm_ctx *tctx,
+					bool has_remainder)
+{
+	u8 *p = (u8 *)tctx + sizeof(*tctx);
+
+	if (has_remainder) /* For messages not a multiple of block length */
+		p += crypto_shash_statesize(tctx->polyval);
+	return p;
+}
+
+static inline u8 *hctr2_hashed_tweak(const struct hctr2_tfm_ctx *tctx,
+				     struct hctr2_request_ctx *rctx)
+{
+	return (u8 *)rctx + tctx->hashed_tweak_offset;
+}
+
+/*
+ * The input data for each HCTR2 hash step begins with a 16-byte block that
+ * contains the tweak length and a flag that indicates whether the input is evenly
+ * divisible into blocks.  Since this implementation only supports one tweak
+ * length, we precompute the two hash states resulting from hashing the two
+ * possible values of this initial block.  This reduces by one block the amount of
+ * data that needs to be hashed for each encryption/decryption
+ *
+ * These precomputed hashes are stored in hctr2_tfm_ctx.
+ */
+static int hctr2_hash_tweaklen(struct hctr2_tfm_ctx *tctx, bool has_remainder)
+{
+	SHASH_DESC_ON_STACK(shash, tfm->polyval);
+	__le64 tweak_length_block[2];
+	int err;
+
+	shash->tfm = tctx->polyval;
+	memset(tweak_length_block, 0, sizeof(tweak_length_block));
+
+	tweak_length_block[0] = cpu_to_le64(TWEAK_SIZE * 8 * 2 + 2 + has_remainder);
+	err = crypto_shash_init(shash);
+	if (err)
+		return err;
+	err = crypto_shash_update(shash, (u8 *)tweak_length_block,
+				  POLYVAL_BLOCK_SIZE);
+	if (err)
+		return err;
+	return crypto_shash_export(shash, hctr2_hashed_tweaklen(tctx, has_remainder));
+}
+
+static int hctr2_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			unsigned int keylen)
+{
+	struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	u8 hbar[BLOCKCIPHER_BLOCK_SIZE];
+	int err;
+
+	crypto_cipher_clear_flags(tctx->blockcipher, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(tctx->blockcipher,
+				crypto_skcipher_get_flags(tfm) &
+				CRYPTO_TFM_REQ_MASK);
+	err = crypto_cipher_setkey(tctx->blockcipher, key, keylen);
+	if (err)
+		return err;
+
+	crypto_skcipher_clear_flags(tctx->xctr, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(tctx->xctr,
+				  crypto_skcipher_get_flags(tfm) &
+				  CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(tctx->xctr, key, keylen);
+	if (err)
+		return err;
+
+	memset(hbar, 0, sizeof(hbar));
+	crypto_cipher_encrypt_one(tctx->blockcipher, hbar, hbar);
+
+	memset(tctx->L, 0, sizeof(tctx->L));
+	tctx->L[0] = 0x01;
+	crypto_cipher_encrypt_one(tctx->blockcipher, tctx->L, tctx->L);
+
+	crypto_shash_clear_flags(tctx->polyval, CRYPTO_TFM_REQ_MASK);
+	crypto_shash_set_flags(tctx->polyval, crypto_skcipher_get_flags(tfm) &
+			       CRYPTO_TFM_REQ_MASK);
+	err = crypto_shash_setkey(tctx->polyval, hbar, BLOCKCIPHER_BLOCK_SIZE);
+	if (err)
+		return err;
+	memzero_explicit(hbar, sizeof(hbar));
+
+	return hctr2_hash_tweaklen(tctx, true) ?: hctr2_hash_tweaklen(tctx, false);
+}
+
+static int hctr2_hash_tweak(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
+	struct shash_desc *hash_desc = &rctx->u.hash_desc;
+	int err;
+	bool has_remainder = req->cryptlen % POLYVAL_BLOCK_SIZE;
+
+	hash_desc->tfm = tctx->polyval;
+	err = crypto_shash_import(hash_desc, hctr2_hashed_tweaklen(tctx, has_remainder));
+	if (err)
+		return err;
+	err = crypto_shash_update(hash_desc, req->iv, TWEAK_SIZE);
+	if (err)
+		return err;
+
+	// Store the hashed tweak, since we need it when computing both
+	// H(T || N) and H(T || V).
+	return crypto_shash_export(hash_desc, hctr2_hashed_tweak(tctx, rctx));
+}
+
+static int hctr2_hash_message(struct skcipher_request *req,
+			      struct scatterlist *sgl,
+			      u8 digest[POLYVAL_DIGEST_SIZE])
+{
+	static const u8 padding[BLOCKCIPHER_BLOCK_SIZE] = { 0x1 };
+	struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
+	struct shash_desc *hash_desc = &rctx->u.hash_desc;
+	const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
+	struct sg_mapping_iter miter;
+	unsigned int remainder = bulk_len % BLOCKCIPHER_BLOCK_SIZE;
+	int i;
+	int err = 0;
+	int n = 0;
+
+	sg_miter_start(&miter, sgl, sg_nents(sgl),
+		       SG_MITER_FROM_SG | SG_MITER_ATOMIC);
+	for (i = 0; i < bulk_len; i += n) {
+		sg_miter_next(&miter);
+		n = min_t(unsigned int, miter.length, bulk_len - i);
+		err = crypto_shash_update(hash_desc, miter.addr, n);
+		if (err)
+			break;
+	}
+	sg_miter_stop(&miter);
+
+	if (err)
+		return err;
+
+	if (remainder) {
+		err = crypto_shash_update(hash_desc, padding,
+					  BLOCKCIPHER_BLOCK_SIZE - remainder);
+		if (err)
+			return err;
+	}
+	return crypto_shash_final(hash_desc, digest);
+}
+
+static int hctr2_finish(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
+	u8 digest[POLYVAL_DIGEST_SIZE];
+	struct shash_desc *hash_desc = &rctx->u.hash_desc;
+	int err;
+
+	// U = UU ^ H(T || V)
+	// or M = MM ^ H(T || N)
+	hash_desc->tfm = tctx->polyval;
+	err = crypto_shash_import(hash_desc, hctr2_hashed_tweak(tctx, rctx));
+	if (err)
+		return err;
+	err = hctr2_hash_message(req, rctx->bulk_part_dst, digest);
+	if (err)
+		return err;
+	crypto_xor(rctx->first_block, digest, BLOCKCIPHER_BLOCK_SIZE);
+
+	// Copy U (or M) into dst scatterlist
+	scatterwalk_map_and_copy(rctx->first_block, req->dst,
+				 0, BLOCKCIPHER_BLOCK_SIZE, 1);
+	return 0;
+}
+
+static void hctr2_xctr_done(struct crypto_async_request *areq,
+				    int err)
+{
+	struct skcipher_request *req = areq->data;
+
+	if (!err)
+		err = hctr2_finish(req);
+
+	skcipher_request_complete(req, err);
+}
+
+static int hctr2_crypt(struct skcipher_request *req, bool enc)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
+	u8 digest[POLYVAL_DIGEST_SIZE];
+	int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
+	int err;
+
+	// Requests must be at least one block
+	if (req->cryptlen < BLOCKCIPHER_BLOCK_SIZE)
+		return -EINVAL;
+
+	// Copy M (or U) into a temporary buffer
+	scatterwalk_map_and_copy(rctx->first_block, req->src,
+				 0, BLOCKCIPHER_BLOCK_SIZE, 0);
+
+	// Create scatterlists for N and V
+	rctx->bulk_part_src = scatterwalk_ffwd(rctx->sg_src, req->src,
+					       BLOCKCIPHER_BLOCK_SIZE);
+	rctx->bulk_part_dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
+					       BLOCKCIPHER_BLOCK_SIZE);
+
+	// MM = M ^ H(T || N)
+	// or UU = U ^ H(T || V)
+	err = hctr2_hash_tweak(req);
+	if (err)
+		return err;
+	err = hctr2_hash_message(req, rctx->bulk_part_src, digest);
+	if (err)
+		return err;
+	crypto_xor(digest, rctx->first_block, BLOCKCIPHER_BLOCK_SIZE);
+
+	// UU = E(MM)
+	// or MM = D(UU)
+	if (enc)
+		crypto_cipher_encrypt_one(tctx->blockcipher, rctx->first_block,
+					  digest);
+	else
+		crypto_cipher_decrypt_one(tctx->blockcipher, rctx->first_block,
+					  digest);
+
+	// S = MM ^ UU ^ L
+	crypto_xor(digest, rctx->first_block, BLOCKCIPHER_BLOCK_SIZE);
+	crypto_xor_cpy(rctx->xctr_iv, digest, tctx->L, BLOCKCIPHER_BLOCK_SIZE);
+
+	// V = XCTR(S, N)
+	// or N = XCTR(S, V)
+	skcipher_request_set_tfm(&rctx->u.xctr_req, tctx->xctr);
+	skcipher_request_set_crypt(&rctx->u.xctr_req, rctx->bulk_part_src,
+				   rctx->bulk_part_dst, bulk_len,
+				   rctx->xctr_iv);
+	skcipher_request_set_callback(&rctx->u.xctr_req,
+				      req->base.flags,
+				      hctr2_xctr_done, req);
+	return crypto_skcipher_encrypt(&rctx->u.xctr_req) ?:
+		hctr2_finish(req);
+}
+
+static int hctr2_encrypt(struct skcipher_request *req)
+{
+	return hctr2_crypt(req, true);
+}
+
+static int hctr2_decrypt(struct skcipher_request *req)
+{
+	return hctr2_crypt(req, false);
+}
+
+static int hctr2_init_tfm(struct crypto_skcipher *tfm)
+{
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct hctr2_instance_ctx *ictx = skcipher_instance_ctx(inst);
+	struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *xctr;
+	struct crypto_cipher *blockcipher;
+	struct crypto_shash *polyval;
+	unsigned int subreq_size;
+	int err;
+
+	xctr = crypto_spawn_skcipher(&ictx->xctr_spawn);
+	if (IS_ERR(xctr))
+		return PTR_ERR(xctr);
+
+	blockcipher = crypto_spawn_cipher(&ictx->blockcipher_spawn);
+	if (IS_ERR(blockcipher)) {
+		err = PTR_ERR(blockcipher);
+		goto err_free_xctr;
+	}
+
+	polyval = crypto_spawn_shash(&ictx->polyval_spawn);
+	if (IS_ERR(polyval)) {
+		err = PTR_ERR(polyval);
+		goto err_free_blockcipher;
+	}
+
+	tctx->xctr = xctr;
+	tctx->blockcipher = blockcipher;
+	tctx->polyval = polyval;
+
+	BUILD_BUG_ON(offsetofend(struct hctr2_request_ctx, u) !=
+				 sizeof(struct hctr2_request_ctx));
+	subreq_size = max(sizeof_field(struct hctr2_request_ctx, u.hash_desc) +
+			  crypto_shash_descsize(polyval),
+			  sizeof_field(struct hctr2_request_ctx, u.xctr_req) +
+			  crypto_skcipher_reqsize(xctr));
+
+	tctx->hashed_tweak_offset = offsetof(struct hctr2_request_ctx, u) +
+				    subreq_size;
+	crypto_skcipher_set_reqsize(tfm, tctx->hashed_tweak_offset +
+				    crypto_shash_statesize(polyval));
+	return 0;
+
+err_free_blockcipher:
+	crypto_free_cipher(blockcipher);
+err_free_xctr:
+	crypto_free_skcipher(xctr);
+	return err;
+}
+
+static void hctr2_exit_tfm(struct crypto_skcipher *tfm)
+{
+	struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_cipher(tctx->blockcipher);
+	crypto_free_skcipher(tctx->xctr);
+	crypto_free_shash(tctx->polyval);
+}
+
+static void hctr2_free_instance(struct skcipher_instance *inst)
+{
+	struct hctr2_instance_ctx *ictx = skcipher_instance_ctx(inst);
+
+	crypto_drop_cipher(&ictx->blockcipher_spawn);
+	crypto_drop_skcipher(&ictx->xctr_spawn);
+	crypto_drop_shash(&ictx->polyval_spawn);
+	kfree(inst);
+}
+
+static int hctr2_create_common(struct crypto_template *tmpl,
+			       struct rtattr **tb,
+			       const char *xctr_name,
+			       const char *polyval_name)
+{
+	u32 mask;
+	struct skcipher_instance *inst;
+	struct hctr2_instance_ctx *ictx;
+	struct skcipher_alg *xctr_alg;
+	struct crypto_alg *blockcipher_alg;
+	struct shash_alg *polyval_alg;
+	char blockcipher_name[CRYPTO_MAX_ALG_NAME];
+	int len;
+	int err;
+
+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask);
+	if (err)
+		return err;
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+	ictx = skcipher_instance_ctx(inst);
+
+	/* Stream cipher, xctr(block_cipher) */
+	err = crypto_grab_skcipher(&ictx->xctr_spawn,
+				   skcipher_crypto_instance(inst),
+				   xctr_name, 0, mask);
+	if (err)
+		goto err_free_inst;
+	xctr_alg = crypto_spawn_skcipher_alg(&ictx->xctr_spawn);
+
+	err = -EINVAL;
+	if (strncmp(xctr_alg->base.cra_name, "xctr(", 5))
+		goto err_free_inst;
+	len = strscpy(blockcipher_name, xctr_alg->base.cra_name + 5,
+		      sizeof(blockcipher_name));
+	if (len < 1)
+		goto err_free_inst;
+	if (blockcipher_name[len - 1] != ')')
+		goto err_free_inst;
+	blockcipher_name[len - 1] = 0;
+
+	/* Block cipher, e.g. "aes" */
+	err = crypto_grab_cipher(&ictx->blockcipher_spawn,
+				 skcipher_crypto_instance(inst),
+				 blockcipher_name, 0, mask);
+	if (err)
+		goto err_free_inst;
+	blockcipher_alg = crypto_spawn_cipher_alg(&ictx->blockcipher_spawn);
+
+	/* Require blocksize of 16 bytes */
+	err = -EINVAL;
+	if (blockcipher_alg->cra_blocksize != BLOCKCIPHER_BLOCK_SIZE)
+		goto err_free_inst;
+
+	/* Polyval ε-∆U hash function */
+	err = crypto_grab_shash(&ictx->polyval_spawn,
+				skcipher_crypto_instance(inst),
+				polyval_name, 0, mask);
+	if (err)
+		goto err_free_inst;
+	polyval_alg = crypto_spawn_shash_alg(&ictx->polyval_spawn);
+
+	/* Ensure Polyval is being used */
+	err = -EINVAL;
+	if (strcmp(polyval_alg->base.cra_name, "polyval") != 0)
+		goto err_free_inst;
+
+	/* Instance fields */
+
+	err = -ENAMETOOLONG;
+	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "hctr2(%s)",
+		     blockcipher_alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
+		goto err_free_inst;
+	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		     "hctr2_base(%s,%s)",
+		     xctr_alg->base.cra_driver_name,
+		     polyval_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+		goto err_free_inst;
+
+	inst->alg.base.cra_blocksize = BLOCKCIPHER_BLOCK_SIZE;
+	inst->alg.base.cra_ctxsize = sizeof(struct hctr2_tfm_ctx) +
+				     polyval_alg->statesize * 2;
+	inst->alg.base.cra_alignmask = xctr_alg->base.cra_alignmask |
+				       polyval_alg->base.cra_alignmask;
+	/*
+	 * The hash function is called twice, so it is weighted higher than the
+	 * xctr and blockcipher.
+	 */
+	inst->alg.base.cra_priority = (2 * xctr_alg->base.cra_priority +
+				       4 * polyval_alg->base.cra_priority +
+				       blockcipher_alg->cra_priority) / 7;
+
+	inst->alg.setkey = hctr2_setkey;
+	inst->alg.encrypt = hctr2_encrypt;
+	inst->alg.decrypt = hctr2_decrypt;
+	inst->alg.init = hctr2_init_tfm;
+	inst->alg.exit = hctr2_exit_tfm;
+	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(xctr_alg);
+	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(xctr_alg);
+	inst->alg.ivsize = TWEAK_SIZE;
+
+	inst->free = hctr2_free_instance;
+
+	err = skcipher_register_instance(tmpl, inst);
+	if (err) {
+err_free_inst:
+		hctr2_free_instance(inst);
+	}
+	return err;
+}
+
+static int hctr2_create_base(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	const char *xctr_name;
+	const char *polyval_name;
+
+	xctr_name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(xctr_name))
+		return PTR_ERR(xctr_name);
+
+	polyval_name = crypto_attr_alg_name(tb[2]);
+	if (IS_ERR(polyval_name))
+		return PTR_ERR(polyval_name);
+
+	return hctr2_create_common(tmpl, tb, xctr_name, polyval_name);
+}
+
+static int hctr2_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	const char *blockcipher_name;
+	char xctr_name[CRYPTO_MAX_ALG_NAME];
+
+	blockcipher_name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(blockcipher_name))
+		return PTR_ERR(blockcipher_name);
+
+	if (snprintf(xctr_name, CRYPTO_MAX_ALG_NAME, "xctr(%s)",
+		    blockcipher_name) >= CRYPTO_MAX_ALG_NAME)
+		return -ENAMETOOLONG;
+
+	return hctr2_create_common(tmpl, tb, xctr_name, "polyval");
+}
+
+static struct crypto_template hctr2_tmpls[] = {
+	{
+		/* hctr2_base(xctr_name, polyval_name) */
+		.name = "hctr2_base",
+		.create = hctr2_create_base,
+		.module = THIS_MODULE,
+	}, {
+		/* hctr2(blockcipher_name) */
+		.name = "hctr2",
+		.create = hctr2_create,
+		.module = THIS_MODULE,
+	}
+};
+
+static int __init hctr2_module_init(void)
+{
+	return crypto_register_templates(hctr2_tmpls, ARRAY_SIZE(hctr2_tmpls));
+}
+
+static void __exit hctr2_module_exit(void)
+{
+	return crypto_unregister_templates(hctr2_tmpls,
+					   ARRAY_SIZE(hctr2_tmpls));
+}
+
+subsys_initcall(hctr2_module_init);
+module_exit(hctr2_module_exit);
+
+MODULE_DESCRIPTION("HCTR2 length-preserving encryption mode");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("hctr2");
+MODULE_IMPORT_NS(CRYPTO_INTERNAL);
diff --git a/crypto/polyval-generic.c b/crypto/polyval-generic.c
new file mode 100644
index 0000000000000..16bfa6925b31e
--- /dev/null
+++ b/crypto/polyval-generic.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * POLYVAL: hash function for HCTR2.
+ *
+ * Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
+ * Copyright (c) 2009 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ * Copyright 2021 Google LLC
+ */
+
+/*
+ * Code based on crypto/ghash-generic.c
+ *
+ * POLYVAL is a keyed hash function similar to GHASH. POLYVAL uses a different
+ * modulus for finite field multiplication which makes hardware accelerated
+ * implementations on little-endian machines faster. POLYVAL is used in the
+ * kernel to implement HCTR2, but was originally specified for AES-GCM-SIV
+ * (RFC 8452).
+ *
+ * For more information see:
+ * Length-preserving encryption with HCTR2:
+ *   https://eprint.iacr.org/2021/1441.pdf
+ * AES-GCM-SIV: Nonce Misuse-Resistant Authenticated Encryption:
+ *   https://datatracker.ietf.org/doc/html/rfc8452
+ *
+ * Like GHASH, POLYVAL is not a cryptographic hash function and should
+ * not be used outside of crypto modes explicitly designed to use POLYVAL.
+ *
+ * This implementation uses a convenient trick involving the GHASH and POLYVAL
+ * fields. This trick allows multiplication in the POLYVAL field to be
+ * implemented by using multiplication in the GHASH field as a subroutine. An
+ * element of the POLYVAL field can be converted to an element of the GHASH
+ * field by computing x*REVERSE(a), where REVERSE reverses the byte-ordering of
+ * a. Similarly, an element of the GHASH field can be converted back to the
+ * POLYVAL field by computing REVERSE(x^{-1}*a). For more information, see:
+ * https://datatracker.ietf.org/doc/html/rfc8452#appendix-A
+ *
+ * By using this trick, we do not need to implement the POLYVAL field for the
+ * generic implementation.
+ *
+ * Warning: this generic implementation is not intended to be used in practice
+ * and is not constant time. For practical use, a hardware accelerated
+ * implementation of POLYVAL should be used instead.
+ *
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/gf128mul.h>
+#include <crypto/polyval.h>
+#include <crypto/internal/hash.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+struct polyval_tfm_ctx {
+	struct gf128mul_4k *gf128;
+};
+
+struct polyval_desc_ctx {
+	union {
+		u8 buffer[POLYVAL_BLOCK_SIZE];
+		be128 buffer128;
+	};
+	u32 bytes;
+};
+
+static void copy_and_reverse(u8 dst[POLYVAL_BLOCK_SIZE],
+			     const u8 src[POLYVAL_BLOCK_SIZE])
+{
+	u64 a = get_unaligned((const u64 *)&src[0]);
+	u64 b = get_unaligned((const u64 *)&src[8]);
+
+	put_unaligned(swab64(a), (u64 *)&dst[8]);
+	put_unaligned(swab64(b), (u64 *)&dst[0]);
+}
+
+/*
+ * Performs multiplication in the POLYVAL field using the GHASH field as a
+ * subroutine.  This function is used as a fallback for hardware accelerated
+ * implementations when simd registers are unavailable.
+ *
+ * Note: This function is not used for polyval-generic, instead we use the 4k
+ * lookup table implementation for finite field multiplication.
+ */
+void polyval_mul_non4k(u8 *op1, const u8 *op2)
+{
+	be128 a, b;
+
+	// Assume one argument is in Montgomery form and one is not.
+	copy_and_reverse((u8 *)&a, op1);
+	copy_and_reverse((u8 *)&b, op2);
+	gf128mul_x_lle(&a, &a);
+	gf128mul_lle(&a, &b);
+	copy_and_reverse(op1, (u8 *)&a);
+}
+EXPORT_SYMBOL_GPL(polyval_mul_non4k);
+
+/*
+ * Perform a POLYVAL update using non4k multiplication.  This function is used
+ * as a fallback for hardware accelerated implementations when simd registers
+ * are unavailable.
+ *
+ * Note: This function is not used for polyval-generic, instead we use the 4k
+ * lookup table implementation of finite field multiplication.
+ */
+void polyval_update_non4k(const u8 *key, const u8 *in,
+			  size_t nblocks, u8 *accumulator)
+{
+	while (nblocks--) {
+		crypto_xor(accumulator, in, POLYVAL_BLOCK_SIZE);
+		polyval_mul_non4k(accumulator, key);
+		in += POLYVAL_BLOCK_SIZE;
+	}
+}
+EXPORT_SYMBOL_GPL(polyval_update_non4k);
+
+static int polyval_setkey(struct crypto_shash *tfm,
+			  const u8 *key, unsigned int keylen)
+{
+	struct polyval_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+	be128 k;
+
+	if (keylen != POLYVAL_BLOCK_SIZE)
+		return -EINVAL;
+
+	gf128mul_free_4k(ctx->gf128);
+
+	BUILD_BUG_ON(sizeof(k) != POLYVAL_BLOCK_SIZE);
+	copy_and_reverse((u8 *)&k, key);
+	gf128mul_x_lle(&k, &k);
+
+	ctx->gf128 = gf128mul_init_4k_lle(&k);
+	memzero_explicit(&k, POLYVAL_BLOCK_SIZE);
+
+	if (!ctx->gf128)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int polyval_init(struct shash_desc *desc)
+{
+	struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	memset(dctx, 0, sizeof(*dctx));
+
+	return 0;
+}
+
+static int polyval_update(struct shash_desc *desc,
+			 const u8 *src, unsigned int srclen)
+{
+	struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
+	const struct polyval_tfm_ctx *ctx = crypto_shash_ctx(desc->tfm);
+	u8 *pos;
+	u8 tmp[POLYVAL_BLOCK_SIZE];
+	int n;
+
+	if (dctx->bytes) {
+		n = min(srclen, dctx->bytes);
+		pos = dctx->buffer + dctx->bytes - 1;
+
+		dctx->bytes -= n;
+		srclen -= n;
+
+		while (n--)
+			*pos-- ^= *src++;
+
+		if (!dctx->bytes)
+			gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
+	}
+
+	while (srclen >= POLYVAL_BLOCK_SIZE) {
+		copy_and_reverse(tmp, src);
+		crypto_xor(dctx->buffer, tmp, POLYVAL_BLOCK_SIZE);
+		gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
+		src += POLYVAL_BLOCK_SIZE;
+		srclen -= POLYVAL_BLOCK_SIZE;
+	}
+
+	if (srclen) {
+		dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
+		pos = dctx->buffer + POLYVAL_BLOCK_SIZE - 1;
+		while (srclen--)
+			*pos-- ^= *src++;
+	}
+
+	return 0;
+}
+
+static int polyval_final(struct shash_desc *desc, u8 *dst)
+{
+	struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
+	const struct polyval_tfm_ctx *ctx = crypto_shash_ctx(desc->tfm);
+
+	if (dctx->bytes)
+		gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
+	copy_and_reverse(dst, dctx->buffer);
+	return 0;
+}
+
+static void polyval_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct polyval_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	gf128mul_free_4k(ctx->gf128);
+}
+
+static struct shash_alg polyval_alg = {
+	.digestsize	= POLYVAL_DIGEST_SIZE,
+	.init		= polyval_init,
+	.update		= polyval_update,
+	.final		= polyval_final,
+	.setkey		= polyval_setkey,
+	.descsize	= sizeof(struct polyval_desc_ctx),
+	.base		= {
+		.cra_name		= "polyval",
+		.cra_driver_name	= "polyval-generic",
+		.cra_priority		= 100,
+		.cra_blocksize		= POLYVAL_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct polyval_tfm_ctx),
+		.cra_module		= THIS_MODULE,
+		.cra_exit		= polyval_exit_tfm,
+	},
+};
+
+static int __init polyval_mod_init(void)
+{
+	return crypto_register_shash(&polyval_alg);
+}
+
+static void __exit polyval_mod_exit(void)
+{
+	crypto_unregister_shash(&polyval_alg);
+}
+
+subsys_initcall(polyval_mod_init);
+module_exit(polyval_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("POLYVAL hash function");
+MODULE_ALIAS_CRYPTO("polyval");
+MODULE_ALIAS_CRYPTO("polyval-generic");
diff --git a/crypto/rsa.c b/crypto/rsa.c
index 39e04176b04b2..0e555ee4addb7 100644
--- a/crypto/rsa.c
+++ b/crypto/rsa.c
@@ -17,6 +17,11 @@ struct rsa_mpi_key {
 	MPI n;
 	MPI e;
 	MPI d;
+	MPI p;
+	MPI q;
+	MPI dp;
+	MPI dq;
+	MPI qinv;
 };
 
 /*
@@ -35,16 +40,49 @@ static int _rsa_enc(const struct rsa_mpi_key *key, MPI c, MPI m)
 
 /*
  * RSADP function [RFC3447 sec 5.1.2]
- * m = c^d mod n;
+ * m_1 = c^dP mod p;
+ * m_2 = c^dQ mod q;
+ * h = (m_1 - m_2) * qInv mod p;
+ * m = m_2 + q * h;
  */
-static int _rsa_dec(const struct rsa_mpi_key *key, MPI m, MPI c)
+static int _rsa_dec_crt(const struct rsa_mpi_key *key, MPI m_or_m1_or_h, MPI c)
 {
+	MPI m2, m12_or_qh;
+	int ret = -ENOMEM;
+
 	/* (1) Validate 0 <= c < n */
 	if (mpi_cmp_ui(c, 0) < 0 || mpi_cmp(c, key->n) >= 0)
 		return -EINVAL;
 
-	/* (2) m = c^d mod n */
-	return mpi_powm(m, c, key->d, key->n);
+	m2 = mpi_alloc(0);
+	m12_or_qh = mpi_alloc(0);
+	if (!m2 || !m12_or_qh)
+		goto err_free_mpi;
+
+	/* (2i) m_1 = c^dP mod p */
+	ret = mpi_powm(m_or_m1_or_h, c, key->dp, key->p);
+	if (ret)
+		goto err_free_mpi;
+
+	/* (2i) m_2 = c^dQ mod q */
+	ret = mpi_powm(m2, c, key->dq, key->q);
+	if (ret)
+		goto err_free_mpi;
+
+	/* (2iii) h = (m_1 - m_2) * qInv mod p */
+	mpi_sub(m12_or_qh, m_or_m1_or_h, m2);
+	mpi_mulm(m_or_m1_or_h, m12_or_qh, key->qinv, key->p);
+
+	/* (2iv) m = m_2 + q * h */
+	mpi_mul(m12_or_qh, key->q, m_or_m1_or_h);
+	mpi_addm(m_or_m1_or_h, m2, m12_or_qh, key->n);
+
+	ret = 0;
+
+err_free_mpi:
+	mpi_free(m12_or_qh);
+	mpi_free(m2);
+	return ret;
 }
 
 static inline struct rsa_mpi_key *rsa_get_key(struct crypto_akcipher *tfm)
@@ -112,7 +150,7 @@ static int rsa_dec(struct akcipher_request *req)
 	if (!c)
 		goto err_free_m;
 
-	ret = _rsa_dec(pkey, m, c);
+	ret = _rsa_dec_crt(pkey, m, c);
 	if (ret)
 		goto err_free_c;
 
@@ -134,9 +172,19 @@ static void rsa_free_mpi_key(struct rsa_mpi_key *key)
 	mpi_free(key->d);
 	mpi_free(key->e);
 	mpi_free(key->n);
+	mpi_free(key->p);
+	mpi_free(key->q);
+	mpi_free(key->dp);
+	mpi_free(key->dq);
+	mpi_free(key->qinv);
 	key->d = NULL;
 	key->e = NULL;
 	key->n = NULL;
+	key->p = NULL;
+	key->q = NULL;
+	key->dp = NULL;
+	key->dq = NULL;
+	key->qinv = NULL;
 }
 
 static int rsa_check_key_length(unsigned int len)
@@ -217,6 +265,26 @@ static int rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 	if (!mpi_key->n)
 		goto err;
 
+	mpi_key->p = mpi_read_raw_data(raw_key.p, raw_key.p_sz);
+	if (!mpi_key->p)
+		goto err;
+
+	mpi_key->q = mpi_read_raw_data(raw_key.q, raw_key.q_sz);
+	if (!mpi_key->q)
+		goto err;
+
+	mpi_key->dp = mpi_read_raw_data(raw_key.dp, raw_key.dp_sz);
+	if (!mpi_key->dp)
+		goto err;
+
+	mpi_key->dq = mpi_read_raw_data(raw_key.dq, raw_key.dq_sz);
+	if (!mpi_key->dq)
+		goto err;
+
+	mpi_key->qinv = mpi_read_raw_data(raw_key.qinv, raw_key.qinv_sz);
+	if (!mpi_key->qinv)
+		goto err;
+
 	if (rsa_check_key_length(mpi_get_size(mpi_key->n) << 3)) {
 		rsa_free_mpi_key(mpi_key);
 		return -EINVAL;
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 2bacf8384f59f..a8831060c4cee 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1556,6 +1556,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		ret += tcrypt_test("rfc3686(ctr(aes))");
 		ret += tcrypt_test("ofb(aes)");
 		ret += tcrypt_test("cfb(aes)");
+		ret += tcrypt_test("xctr(aes)");
 		break;
 
 	case 11:
@@ -1669,10 +1670,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		ret += tcrypt_test("rmd160");
 		break;
 
-	case 41:
-		ret += tcrypt_test("blake2s-256");
-		break;
-
 	case 42:
 		ret += tcrypt_test("blake2b-512");
 		break;
@@ -1729,6 +1726,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		ret += tcrypt_test("ccm(sm4)");
 		break;
 
+	case 57:
+		ret += tcrypt_test("polyval");
+		break;
+
 	case 100:
 		ret += tcrypt_test("hmac(md5)");
 		break;
@@ -2186,6 +2187,11 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 				   16, 16, aead_speed_template_19, num_mb);
 		break;
 
+	case 226:
+		test_cipher_speed("hctr2(aes)", ENCRYPT, sec, NULL,
+				  0, speed_template_32);
+		break;
+
 	case 300:
 		if (alg) {
 			test_hash_speed(alg, sec, generic_hash_speed_template);
@@ -2240,10 +2246,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		test_hash_speed("rmd160", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 		fallthrough;
-	case 316:
-		test_hash_speed("blake2s-256", sec, generic_hash_speed_template);
-		if (mode > 300 && mode < 400) break;
-		fallthrough;
 	case 317:
 		test_hash_speed("blake2b-512", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
@@ -2352,10 +2354,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		test_ahash_speed("rmd160", sec, generic_hash_speed_template);
 		if (mode > 400 && mode < 500) break;
 		fallthrough;
-	case 416:
-		test_ahash_speed("blake2s-256", sec, generic_hash_speed_template);
-		if (mode > 400 && mode < 500) break;
-		fallthrough;
 	case 417:
 		test_ahash_speed("blake2b-512", sec, generic_hash_speed_template);
 		if (mode > 400 && mode < 500) break;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 5801a8f9f7134..7a8a567499603 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4376,30 +4376,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.hash = __VECS(blake2b_512_tv_template)
 		}
 	}, {
-		.alg = "blake2s-128",
-		.test = alg_test_hash,
-		.suite = {
-			.hash = __VECS(blakes2s_128_tv_template)
-		}
-	}, {
-		.alg = "blake2s-160",
-		.test = alg_test_hash,
-		.suite = {
-			.hash = __VECS(blakes2s_160_tv_template)
-		}
-	}, {
-		.alg = "blake2s-224",
-		.test = alg_test_hash,
-		.suite = {
-			.hash = __VECS(blakes2s_224_tv_template)
-		}
-	}, {
-		.alg = "blake2s-256",
-		.test = alg_test_hash,
-		.suite = {
-			.hash = __VECS(blakes2s_256_tv_template)
-		}
-	}, {
 		.alg = "cbc(aes)",
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
@@ -5089,6 +5065,14 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.hash = __VECS(ghash_tv_template)
 		}
 	}, {
+		.alg = "hctr2(aes)",
+		.generic_driver =
+		    "hctr2_base(xctr(aes-generic),polyval-generic)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(aes_hctr2_tv_template)
+		}
+	}, {
 		.alg = "hmac(md5)",
 		.test = alg_test_hash,
 		.suite = {
@@ -5343,6 +5327,12 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.hash = __VECS(poly1305_tv_template)
 		}
 	}, {
+		.alg = "polyval",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = __VECS(polyval_tv_template)
+		}
+	}, {
 		.alg = "rfc3686(ctr(aes))",
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
@@ -5549,6 +5539,12 @@ static const struct alg_test_desc alg_test_descs[] = {
 			.cipher = __VECS(xchacha20_tv_template)
 		},
 	}, {
+		.alg = "xctr(aes)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(aes_xctr_tv_template)
+		}
+	}, {
 		.alg = "xts(aes)",
 		.generic_driver = "xts(ecb(aes-generic))",
 		.test = alg_test_skcipher,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 4d7449fc6a655..4f3955ea40bf6 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -34034,221 +34034,1540 @@ static const struct hash_testvec blake2b_512_tv_template[] = {{
 			  0xae, 0x15, 0x81, 0x15, 0xd0, 0x88, 0xa0, 0x3c, },
 }};
 
-static const struct hash_testvec blakes2s_128_tv_template[] = {{
-	.digest = (u8[]){ 0x64, 0x55, 0x0d, 0x6f, 0xfe, 0x2c, 0x0a, 0x01,
-			  0xa1, 0x4a, 0xba, 0x1e, 0xad, 0xe0, 0x20, 0x0c, },
-}, {
-	.plaintext = blake2_ordered_sequence,
-	.psize = 64,
-	.digest = (u8[]){ 0xdc, 0x66, 0xca, 0x8f, 0x03, 0x86, 0x58, 0x01,
-			  0xb0, 0xff, 0xe0, 0x6e, 0xd8, 0xa1, 0xa9, 0x0e, },
-}, {
-	.ksize = 16,
-	.key = blake2_ordered_sequence,
-	.plaintext = blake2_ordered_sequence,
-	.psize = 1,
-	.digest = (u8[]){ 0x88, 0x1e, 0x42, 0xe7, 0xbb, 0x35, 0x80, 0x82,
-			  0x63, 0x7c, 0x0a, 0x0f, 0xd7, 0xec, 0x6c, 0x2f, },
-}, {
-	.ksize = 32,
-	.key = blake2_ordered_sequence,
-	.plaintext = blake2_ordered_sequence,
-	.psize = 7,
-	.digest = (u8[]){ 0xcf, 0x9e, 0x07, 0x2a, 0xd5, 0x22, 0xf2, 0xcd,
-			  0xa2, 0xd8, 0x25, 0x21, 0x80, 0x86, 0x73, 0x1c, },
-}, {
-	.ksize = 1,
-	.key = "B",
-	.plaintext = blake2_ordered_sequence,
-	.psize = 15,
-	.digest = (u8[]){ 0xf6, 0x33, 0x5a, 0x2c, 0x22, 0xa0, 0x64, 0xb2,
-			  0xb6, 0x3f, 0xeb, 0xbc, 0xd1, 0xc3, 0xe5, 0xb2, },
-}, {
-	.ksize = 16,
-	.key = blake2_ordered_sequence,
-	.plaintext = blake2_ordered_sequence,
-	.psize = 247,
-	.digest = (u8[]){ 0x72, 0x66, 0x49, 0x60, 0xf9, 0x4a, 0xea, 0xbe,
-			  0x1f, 0xf4, 0x60, 0xce, 0xb7, 0x81, 0xcb, 0x09, },
-}, {
-	.ksize = 32,
-	.key = blake2_ordered_sequence,
-	.plaintext = blake2_ordered_sequence,
-	.psize = 256,
-	.digest = (u8[]){ 0xd5, 0xa4, 0x0e, 0xc3, 0x16, 0xc7, 0x51, 0xa6,
-			  0x3c, 0xd0, 0xd9, 0x11, 0x57, 0xfa, 0x1e, 0xbb, },
-}};
+/*
+ * Test vectors generated using https://github.com/google/hctr2
+ */
+static const struct cipher_testvec aes_xctr_tv_template[] = {
+	{
+		.key	= "\x9c\x8d\xc4\xbd\x71\x36\xdc\x82"
+			  "\x7c\xa1\xca\xa3\x23\x5a\xdb\xa4",
+		.iv	= "\x8d\xe7\xa5\x6a\x95\x86\x42\xde"
+			  "\xba\xea\x6e\x69\x03\x33\x86\x0f",
+		.ptext	= "\xbd",
+		.ctext	= "\xb9",
+		.klen	= 16,
+		.len	= 1,
+	},
+	{
+		.key	= "\xbc\x1b\x12\x0c\x3f\x18\xcc\x1f"
+			  "\x5a\x1d\xab\x81\xa8\x68\x7c\x63",
+		.iv	= "\x22\xc1\xdd\x25\x0b\x18\xcb\xa5"
+			  "\x4a\xda\x15\x07\x73\xd9\x88\x10",
+		.ptext	= "\x24\x6e\x64\xc6\x15\x26\x9c\xda"
+			  "\x2a\x4b\x57\x12\xff\x7c\xd6\xb5",
+		.ctext	= "\xd6\x47\x8d\x58\x92\xb2\x84\xf9"
+			  "\xb7\xee\x0d\x98\xa1\x39\x4d\x8f",
+		.klen	= 16,
+		.len	= 16,
+	},
+	{
+		.key	= "\x44\x03\xbf\x4c\x30\xf0\xa7\xd6"
+			  "\xbd\x54\xbb\x66\x8e\xa6\x0e\x8a",
+		.iv	= "\xe6\xf7\x26\xdf\x8c\x3c\xaa\x88"
+			  "\xce\xc1\xbd\x43\x3b\x09\x62\xad",
+		.ptext	= "\x3c\xe3\x46\xb9\x8f\x9d\x3f\x8d"
+			  "\xef\xf2\x53\xab\x24\xe2\x29\x08"
+			  "\xf8\x7e\x1d\xa6\x6d\x86\x7d\x60"
+			  "\x97\x63\x93\x29\x71\x94\xb4",
+		.ctext	= "\xd4\xa3\xc6\xb8\xc1\x6f\x70\x1a"
+			  "\x52\x0c\xed\x4c\xaf\x51\x56\x23"
+			  "\x48\x45\x07\x10\x34\xc5\xba\x71"
+			  "\xe5\xf8\x1e\xd8\xcb\xa6\xe7",
+		.klen	= 16,
+		.len	= 31,
+	},
+	{
+		.key	= "\x5b\x17\x30\x94\x19\x31\xa1\xae"
+			  "\x24\x8e\x42\x1e\x82\xe6\xec\xb8",
+		.iv	= "\xd1\x2e\xb9\xb8\xf8\x49\xeb\x68"
+			  "\x06\xeb\x65\x33\x34\xa2\xeb\xf0",
+		.ptext	= "\x19\x75\xec\x59\x60\x1b\x7a\x3e"
+			  "\x62\x46\x87\xf0\xde\xab\x81\x36"
+			  "\x63\x53\x11\xa0\x1f\xce\x25\x85"
+			  "\x49\x6b\x28\xfa\x1c\x92\xe5\x18"
+			  "\x38\x14\x00\x79\xf2\x9e\xeb\xfc"
+			  "\x36\xa7\x6b\xe1\xe5\xcf\x04\x48"
+			  "\x44\x6d\xbd\x64\xb3\xcb\x78\x05"
+			  "\x8d\x7f\x9a\xaf\x3c\xcf\x6c\x45"
+			  "\x6c\x7c\x46\x4c\xa8\xc0\x1e\xe4"
+			  "\x33\xa5\x7b\xbb\x26\xd9\xc0\x32"
+			  "\x9d\x8a\xb3\xf3\x3d\x52\xe6\x48"
+			  "\x4c\x9b\x4c\x6e\xa4\xa3\xad\x66"
+			  "\x56\x48\xd5\x98\x3a\x93\xc4\x85"
+			  "\xe9\x89\xca\xa6\xc1\xc8\xe7\xf8"
+			  "\xc3\xe9\xef\xbe\x77\xe6\xd1\x3a"
+			  "\xa6\x99\xc8\x2d\xdf\x40\x0f\x44",
+		.ctext	= "\xc6\x1a\x01\x1a\x00\xba\x04\xff"
+			  "\x10\xd1\x7e\x5d\xad\x91\xde\x8c"
+			  "\x08\x55\x95\xae\xd7\x22\x77\x40"
+			  "\xf0\x33\x1b\x51\xef\xfe\x3d\x67"
+			  "\xdf\xc4\x9f\x39\x47\x67\x93\xab"
+			  "\xaa\x37\x55\xfe\x41\xe0\xba\xcd"
+			  "\x25\x02\x7c\x61\x51\xa1\xcc\x72"
+			  "\x7a\x20\x26\xb9\x06\x68\xbd\x19"
+			  "\xc5\x2e\x1b\x75\x4a\x40\xb2\xd2"
+			  "\xc4\xee\xd8\x5b\xa4\x55\x7d\x25"
+			  "\xfc\x01\x4d\x6f\x0a\xfd\x37\x5d"
+			  "\x3e\x67\xc0\x35\x72\x53\x7b\xe2"
+			  "\xd6\x19\x5b\x92\x6c\x3a\x8c\x2a"
+			  "\xe2\xc2\xa2\x4f\x2a\xf2\xb5\x15"
+			  "\x65\xc5\x8d\x97\xf9\xbf\x8c\x98"
+			  "\xe4\x50\x1a\xf2\x76\x55\x07\x49",
+		.klen	= 16,
+		.len	= 128,
+	},
+	{
+		.key	= "\x17\xa6\x01\x3d\x5d\xd6\xef\x2d"
+			  "\x69\x8f\x4c\x54\x5b\xae\x43\xf0",
+		.iv	= "\xa9\x1b\x47\x60\x26\x82\xf7\x1c"
+			  "\x80\xf8\x88\xdd\xfb\x44\xd9\xda",
+		.ptext	= "\xf7\x67\xcd\xa6\x04\x65\x53\x99"
+			  "\x90\x5c\xa2\x56\x74\xd7\x9d\xf2"
+			  "\x0b\x03\x7f\x4e\xa7\x84\x72\x2b"
+			  "\xf0\xa5\xbf\xe6\x9a\x62\x3a\xfe"
+			  "\x69\x5c\x93\x79\x23\x86\x64\x85"
+			  "\xeb\x13\xb1\x5a\xd5\x48\x39\xa0"
+			  "\x70\xfb\x06\x9a\xd7\x12\x5a\xb9"
+			  "\xbe\xed\x2c\x81\x64\xf7\xcf\x80"
+			  "\xee\xe6\x28\x32\x2d\x37\x4c\x32"
+			  "\xf4\x1f\x23\x21\xe9\xc8\xc9\xbf"
+			  "\x54\xbc\xcf\xb4\xc2\x65\x39\xdf"
+			  "\xa5\xfb\x14\x11\xed\x62\x38\xcf"
+			  "\x9b\x58\x11\xdd\xe9\xbd\x37\x57"
+			  "\x75\x4c\x9e\xd5\x67\x0a\x48\xc6"
+			  "\x0d\x05\x4e\xb1\x06\xd7\xec\x2e"
+			  "\x9e\x59\xde\x4f\xab\x38\xbb\xe5"
+			  "\x87\x04\x5a\x2c\x2a\xa2\x8f\x3c"
+			  "\xe7\xe1\x46\xa9\x49\x9f\x24\xad"
+			  "\x2d\xb0\x55\x40\x64\xd5\xda\x7e"
+			  "\x1e\x77\xb8\x29\x72\x73\xc3\x84"
+			  "\xcd\xf3\x94\x90\x58\x76\xc9\x2c"
+			  "\x2a\xad\x56\xde\x33\x18\xb6\x3b"
+			  "\x10\xe9\xe9\x8d\xf0\xa9\x7f\x05"
+			  "\xf7\xb5\x8c\x13\x7e\x11\x3d\x1e"
+			  "\x02\xbb\x5b\xea\x69\xff\x85\xcf"
+			  "\x6a\x18\x97\x45\xe3\x96\xba\x4d"
+			  "\x2d\x7a\x70\x78\x15\x2c\xe9\xdc"
+			  "\x4e\x09\x92\x57\x04\xd8\x0b\xa6"
+			  "\x20\x71\x76\x47\x76\x96\x89\xa0"
+			  "\xd9\x29\xa2\x5a\x06\xdb\x56\x39"
+			  "\x60\x33\x59\x04\x95\x89\xf6\x18"
+			  "\x1d\x70\x75\x85\x3a\xb7\x6e",
+		.ctext	= "\xe1\xe7\x3f\xd3\x6a\xb9\x2f\x64"
+			  "\x37\xc5\xa4\xe9\xca\x0a\xa1\xd6"
+			  "\xea\x7d\x39\xe5\xe6\xcc\x80\x54"
+			  "\x74\x31\x2a\x04\x33\x79\x8c\x8e"
+			  "\x4d\x47\x84\x28\x27\x9b\x3c\x58"
+			  "\x54\x58\x20\x4f\x70\x01\x52\x5b"
+			  "\xac\x95\x61\x49\x5f\xef\xba\xce"
+			  "\xd7\x74\x56\xe7\xbb\xe0\x3c\xd0"
+			  "\x7f\xa9\x23\x57\x33\x2a\xf6\xcb"
+			  "\xbe\x42\x14\x95\xa8\xf9\x7a\x7e"
+			  "\x12\x53\x3a\xe2\x13\xfe\x2d\x89"
+			  "\xeb\xac\xd7\xa8\xa5\xf8\x27\xf3"
+			  "\x74\x9a\x65\x63\xd1\x98\x3a\x7e"
+			  "\x27\x7b\xc0\x20\x00\x4d\xf4\xe5"
+			  "\x7b\x69\xa6\xa8\x06\x50\x85\xb6"
+			  "\x7f\xac\x7f\xda\x1f\xf5\x37\x56"
+			  "\x9b\x2f\xd3\x86\x6b\x70\xbd\x0e"
+			  "\x55\x9a\x9d\x4b\x08\xb5\x5b\x7b"
+			  "\xd4\x7c\xb4\x71\x49\x92\x4a\x1e"
+			  "\xed\x6d\x11\x09\x47\x72\x32\x6a"
+			  "\x97\x53\x36\xaf\xf3\x06\x06\x2c"
+			  "\x69\xf1\x59\x00\x36\x95\x28\x2a"
+			  "\xb6\xcd\x10\x21\x84\x73\x5c\x96"
+			  "\x86\x14\x2c\x3d\x02\xdb\x53\x9a"
+			  "\x61\xde\xea\x99\x84\x7a\x27\xf6"
+			  "\xf7\xc8\x49\x73\x4b\xb8\xeb\xd3"
+			  "\x41\x33\xdd\x09\x68\xe2\x64\xb8"
+			  "\x5f\x75\x74\x97\x91\x54\xda\xc2"
+			  "\x73\x2c\x1e\x5a\x84\x48\x01\x1a"
+			  "\x0d\x8b\x0a\xdf\x07\x2e\xee\x77"
+			  "\x1d\x17\x41\x7a\xc9\x33\x63\xfa"
+			  "\x9f\xc3\x74\x57\x5f\x03\x4c",
+		.klen	= 16,
+		.len	= 255,
+	},
+	{
+		.key	= "\xe5\xf1\x48\x2e\x88\xdb\xc7\x28"
+			  "\xa2\x55\x5d\x2f\x90\x02\xdc\xd3"
+			  "\xf5\xd3\x9e\x87\xd5\x58\x30\x4a",
+		.iv	= "\xa6\x40\x39\xf9\x63\x6c\x2d\xd4"
+			  "\x1b\x71\x05\xa4\x88\x86\x11\xd3",
+		.ptext	= "\xb6\x06\xae\x15\x11\x96\xc1\x44"
+			  "\x44\xc2\x98\xf9\xa8\x0a\x0b",
+		.ctext	= "\x27\x3b\x68\x40\xa9\x5e\x74\x6b"
+			  "\x74\x67\x18\xf9\x37\xed\xed",
+		.klen	= 24,
+		.len	= 15,
+	},
+	{
+		.key	= "\xc8\xa0\x27\x67\x04\x3f\xed\xa5"
+			  "\xb4\x0c\x51\x91\x2d\x27\x77\x33"
+			  "\xa5\xfc\x2a\x9f\x78\xd8\x1c\x68",
+		.iv	= "\x83\x99\x1a\xe2\x84\xca\xa9\x16"
+			  "\x8d\xc4\x2d\x1b\x67\xc8\x86\x21",
+		.ptext	= "\xd6\x22\x85\xb8\x5d\x7e\x26\x2e"
+			  "\xbe\x04\x9d\x0c\x03\x91\x45\x4a"
+			  "\x36",
+		.ctext	= "\x0f\x44\xa9\x62\x72\xec\x12\x26"
+			  "\x3a\xc6\x83\x26\x62\x5e\xb7\x13"
+			  "\x05",
+		.klen	= 24,
+		.len	= 17,
+	},
+	{
+		.key	= "\xc5\x87\x18\x09\x0a\x4e\x66\x3e"
+			  "\x50\x90\x19\x93\xc0\x33\xcf\x80"
+			  "\x3a\x36\x6b\x6c\x43\xd7\xe4\x93",
+		.iv	= "\xdd\x0b\x75\x1f\xee\x2f\xb4\x52"
+			  "\x10\x82\x1f\x79\x8a\xa4\x9b\x87",
+		.ptext	= "\x56\xf9\x13\xce\x9f\x30\x10\x11"
+			  "\x1b\x59\xfd\x39\x5a\x29\xa3\x44"
+			  "\x78\x97\x8c\xf6\x99\x6d\x26\xf1"
+			  "\x32\x60\x6a\xeb\x04\x47\x29\x4c"
+			  "\x7e\x14\xef\x4d\x55\x29\xfe\x36"
+			  "\x37\xcf\x0b\x6e\xf3\xce\x15\xd2",
+		.ctext	= "\x8f\x98\xe1\x5a\x7f\xfe\xc7\x05"
+			  "\x76\xb0\xd5\xde\x90\x52\x2b\xa8"
+			  "\xf3\x6e\x3c\x77\xa5\x33\x63\xdd"
+			  "\x6f\x62\x12\xb0\x80\x10\xc1\x28"
+			  "\x58\xe5\xd6\x24\x44\x04\x55\xf3"
+			  "\x6d\x94\xcb\x2c\x7e\x7a\x85\x79",
+		.klen	= 24,
+		.len	= 48,
+	},
+	{
+		.key	= "\x84\x9b\xe8\x10\x4c\xb3\xd1\x7a"
+			  "\xb3\xab\x4e\x6f\x90\x12\x07\xf8"
+			  "\xef\xde\x42\x09\xbf\x34\x95\xb2",
+		.iv	= "\x66\x62\xf9\x48\x9d\x17\xf7\xdf"
+			  "\x06\x67\xf4\x6d\xf2\xbc\xa2\xe5",
+		.ptext	= "\x2f\xd6\x16\x6b\xf9\x4b\x44\x14"
+			  "\x90\x93\xe5\xfd\x05\xaa\x00\x26"
+			  "\xbd\xab\x11\xb8\xf0\xcb\x11\x72"
+			  "\xdd\xc5\x15\x4f\x4e\x1b\xf8\xc9"
+			  "\x8f\x4a\xd5\x69\xf8\x9e\xfb\x05"
+			  "\x8a\x37\x46\xfe\xfa\x58\x9b\x0e"
+			  "\x72\x90\x9a\x06\xa5\x42\xf4\x7c"
+			  "\x35\xd5\x64\x70\x72\x67\xfc\x8b"
+			  "\xab\x5a\x2f\x64\x9b\xa1\xec\xe7"
+			  "\xe6\x92\x69\xdb\x62\xa4\xe7\x44"
+			  "\x88\x28\xd4\x52\x64\x19\xa9\xd7"
+			  "\x0c\x00\xe6\xe7\xc1\x28\xc1\xf5"
+			  "\x72\xc5\xfa\x09\x22\x2e\xf4\x82"
+			  "\xa3\xdc\xc1\x68\xf9\x29\x55\x8d"
+			  "\x04\x67\x13\xa6\x52\x04\x3c\x0c"
+			  "\x14\xf2\x87\x23\x61\xab\x82\xcb"
+			  "\x49\x5b\x6b\xd4\x4f\x0d\xd4\x95"
+			  "\x82\xcd\xe3\x69\x47\x1b\x31\x73"
+			  "\x73\x77\xc1\x53\x7d\x43\x5e\x4a"
+			  "\x80\x3a\xca\x9c\xc7\x04\x1a\x31"
+			  "\x8e\xe6\x76\x7f\xe1\xb3\xd0\x57"
+			  "\xa2\xb2\xf6\x09\x51\xc9\x6d\xbc"
+			  "\x79\xed\x57\x50\x36\xd2\x93\xa4"
+			  "\x40\x5d\xac\x3a\x3b\xb6\x2d\x89"
+			  "\x78\xa2\xbd\x23\xec\x35\x06\xf0"
+			  "\xa8\xc8\xc9\xb0\xe3\x28\x2b\xba"
+			  "\x70\xa0\xfe\xed\x13\xc4\xd7\x90"
+			  "\xb1\x6a\xe0\xe1\x30\x71\x15\xd0"
+			  "\xe2\xb3\xa6\x4e\xb0\x01\xf9\xe7"
+			  "\x59\xc6\x1e\xed\x46\x2b\xe3\xa8"
+			  "\x22\xeb\x7f\x1c\xd9\xcd\xe0\xa6"
+			  "\x72\x42\x2c\x06\x75\xbb\xb7\x6b"
+			  "\xca\x49\x5e\xa1\x47\x8d\x9e\xfe"
+			  "\x60\xcc\x34\x95\x8e\xfa\x1e\x3e"
+			  "\x85\x4b\x03\x54\xea\x34\x1c\x41"
+			  "\x90\x45\xa6\xbe\xcf\x58\x4f\xca"
+			  "\x2c\x79\xc0\x3e\x8f\xd7\x3b\xd4"
+			  "\x55\x74\xa8\xe1\x57\x09\xbf\xab"
+			  "\x2c\xf9\xe4\xdd\x17\x99\x57\x60"
+			  "\x4b\x88\x2a\x7f\x43\x86\xb9\x9a"
+			  "\x60\xbf\x4c\xcf\x9b\x41\xb8\x99"
+			  "\x69\x15\x4f\x91\x4d\xeb\xdf\x6f"
+			  "\xcc\x4c\xf9\x6f\xf2\x33\x23\xe7"
+			  "\x02\x44\xaa\xa2\xfa\xb1\x39\xa5"
+			  "\xff\x88\xf5\x37\x02\x33\x24\xfc"
+			  "\x79\x11\x4c\x94\xc2\x31\x87\x9c"
+			  "\x53\x19\x99\x32\xe4\xde\x18\xf4"
+			  "\x8f\xe2\xe8\xa3\xfb\x0b\xaa\x7c"
+			  "\xdb\x83\x0f\xf6\xc0\x8a\x9b\xcd"
+			  "\x7b\x16\x05\x5b\xe4\xb4\x34\x03"
+			  "\xe3\x8f\xc9\x4b\x56\x84\x2a\x4c"
+			  "\x36\x72\x3c\x84\x4f\xba\xa2\x7f"
+			  "\xf7\x1b\xba\x4d\x8a\xb8\x5d\x51"
+			  "\x36\xfb\xef\x23\x18\x6f\x33\x2d"
+			  "\xbb\x06\x24\x8e\x33\x98\x6e\xcd"
+			  "\x63\x11\x18\x6b\xcc\x1b\x66\xb9"
+			  "\x38\x8d\x06\x8d\x98\x1a\xef\xaa"
+			  "\x35\x4a\x90\xfa\xb1\xd3\xcc\x11"
+			  "\x50\x4c\x54\x18\x60\x5d\xe4\x11"
+			  "\xfc\x19\xe1\x53\x20\x5c\xe7\xef"
+			  "\x8a\x2b\xa8\x82\x51\x5f\x5d\x43"
+			  "\x34\xe5\xcf\x7b\x1b\x6f\x81\x19"
+			  "\xb7\xdf\xa8\x9e\x81\x89\x5f\x33"
+			  "\x69\xaf\xde\x89\x68\x88\xf0\x71",
+		.ctext	= "\xab\x15\x46\x5b\xed\x4f\xa8\xac"
+			  "\xbf\x31\x30\x84\x55\xa4\xb8\x98"
+			  "\x79\xba\xa0\x15\xa4\x55\x20\xec"
+			  "\xf9\x94\x71\xe6\x6a\x6f\xee\x87"
+			  "\x2e\x3a\xa2\x95\xae\x6e\x56\x09"
+			  "\xe9\xc0\x0f\xe2\xc6\xb7\x30\xa9"
+			  "\x73\x8e\x59\x7c\xfd\xe3\x71\xf7"
+			  "\xae\x8b\x91\xab\x5e\x36\xe9\xa8"
+			  "\xff\x17\xfa\xa2\x94\x93\x11\x42"
+			  "\x67\x96\x99\xc5\xf0\xad\x2a\x57"
+			  "\xf9\xa6\x70\x4a\xdf\x71\xff\xc0"
+			  "\xe2\xaf\x9a\xae\x57\x58\x13\x3b"
+			  "\x2d\xf1\xc7\x8f\xdb\x8a\xcc\xce"
+			  "\x53\x1a\x69\x55\x39\xc8\xbe\xc3"
+			  "\x2d\xb1\x03\xd9\xa3\x99\xf4\x8d"
+			  "\xd9\x2d\x27\xae\xa5\xe7\x77\x7f"
+			  "\xbb\x88\x84\xea\xfa\x19\x3f\x44"
+			  "\x61\x21\x8a\x1f\xbe\xac\x60\xb4"
+			  "\xaf\xe9\x00\xab\xef\x3c\x53\x56"
+			  "\xcd\x4b\x53\xd8\x9b\xfe\x88\x23"
+			  "\x5b\x85\x76\x08\xec\xd1\x6e\x4a"
+			  "\x87\xa4\x7d\x29\x4e\x4f\x3f\xc9"
+			  "\xa4\xab\x63\xea\xdd\xef\x9f\x79"
+			  "\x38\x18\x7d\x90\x90\xf9\x12\x57"
+			  "\x1d\x89\xea\xfe\xd4\x47\x45\x32"
+			  "\x6a\xf6\xe7\xde\x22\x7e\xee\xc1"
+			  "\xbc\x2d\xc3\xbb\xe5\xd4\x13\xac"
+			  "\x63\xff\x5b\xb1\x05\x96\xd5\xf3"
+			  "\x07\x9a\x62\xb6\x30\xea\x7d\x1e"
+			  "\xee\x75\x0a\x1b\xcc\x6e\x4d\xa7"
+			  "\xf7\x4d\x74\xd8\x60\x32\x5e\xd0"
+			  "\x93\xd7\x19\x90\x4e\x26\xdb\xe4"
+			  "\x5e\xd4\xa8\xb9\x76\xba\x56\x91"
+			  "\xc4\x75\x04\x1e\xc2\x77\x24\x6f"
+			  "\xf9\xe8\x4a\xec\x7f\x86\x95\xb3"
+			  "\x5c\x2c\x97\xab\xf0\xf7\x74\x5b"
+			  "\x0b\xc2\xda\x42\x40\x34\x16\xed"
+			  "\x06\xc1\x25\x53\x17\x0d\x81\x4e"
+			  "\xe6\xf2\x0f\x6d\x94\x3c\x90\x7a"
+			  "\xae\x20\xe9\x3f\xf8\x18\x67\x6a"
+			  "\x49\x1e\x41\xb6\x46\xab\xc8\xa7"
+			  "\xcb\x19\x96\xf5\x99\xc0\x66\x3e"
+			  "\x77\xcf\x73\x52\x83\x2a\xe2\x48"
+			  "\x27\x6c\xeb\xe7\xe7\xc4\xd5\x6a"
+			  "\x40\x67\xbc\xbf\x6b\x3c\xf3\xbb"
+			  "\x51\x5e\x31\xac\x03\x81\xab\x61"
+			  "\xfa\xa5\xa6\x7d\x8b\xc3\x8a\x75"
+			  "\x28\x7a\x71\x9c\xac\x8f\x76\xfc"
+			  "\xf9\x6c\x5d\x9b\xd7\xf6\x36\x2d"
+			  "\x61\xd5\x61\xaa\xdd\x01\xfc\x57"
+			  "\x91\x10\xcd\xcd\x6d\x27\x63\x24"
+			  "\x67\x46\x7a\xbb\x61\x56\x39\xb1"
+			  "\xd6\x79\xfe\x77\xca\xd6\x73\x59"
+			  "\x6e\x58\x11\x90\x03\x26\x74\x2a"
+			  "\xfa\x52\x12\x47\xfb\x12\xeb\x3e"
+			  "\x88\xf0\x52\x6c\xc0\x54\x7a\x88"
+			  "\x8c\xe5\xde\x9e\xba\xb9\xf2\xe1"
+			  "\x97\x2e\x5c\xbd\xf4\x13\x7e\xf3"
+			  "\xc4\xe1\x87\xa5\x35\xfa\x7c\x71"
+			  "\x1a\xc9\xf4\xa8\x57\xe2\x5a\x6b"
+			  "\x14\xe0\x73\xaf\x56\x6b\xa0\x00"
+			  "\x9e\x5f\x64\xac\x00\xfb\xc4\x92"
+			  "\xe5\xe2\x8a\xb2\x9e\x75\x49\x85"
+			  "\x25\x66\xa5\x1a\xf9\x7d\x1d\x60",
+		.klen	= 24,
+		.len	= 512,
+	},
+	{
+		.key	= "\x05\x60\x3a\x7e\x60\x90\x46\x18"
+			  "\x6c\x60\xba\xeb\x12\xd7\xbe\xd1"
+			  "\xd3\xf6\x10\x46\x9d\xf1\x0c\xb4"
+			  "\x73\xe3\x93\x27\xa8\x2c\x13\xaa",
+		.iv	= "\xf5\x96\xd1\xb6\xcb\x44\xd8\xd0"
+			  "\x3e\xdb\x92\x80\x08\x94\xcd\xd3",
+		.ptext	= "\x78",
+		.ctext	= "\xc5",
+		.klen	= 32,
+		.len	= 1,
+	},
+	{
+		.key	= "\x35\xca\x38\xf3\xd9\xd6\x34\xef"
+			  "\xcd\xee\xa3\x26\x86\xba\xfb\x45"
+			  "\x01\xfa\x52\x67\xff\xc5\x9d\xaa"
+			  "\x64\x9a\x05\xbb\x85\x20\xa7\xf2",
+		.iv	= "\xe3\xda\xf5\xff\x42\x59\x87\x86"
+			  "\xee\x7b\xd6\xb4\x6a\x25\x44\xff",
+		.ptext	= "\x44\x67\x1e\x04\x53\xd2\x4b\xd9"
+			  "\x96\x33\x07\x54\xe4\x8e\x20",
+		.ctext	= "\xcc\x55\x40\x79\x47\x5c\x8b\xa6"
+			  "\xca\x7b\x9f\x50\xe3\x21\xea",
+		.klen	= 32,
+		.len	= 15,
+	},
+	{
+		.key	= "\xaf\xd9\x14\x14\xd5\xdb\xc9\xce"
+			  "\x76\x5c\x5a\xbf\x43\x05\x29\x24"
+			  "\xc4\x13\x68\xcc\xe8\x37\xbd\xb9"
+			  "\x41\x20\xf5\x53\x48\xd0\xa2\xd6",
+		.iv	= "\xa7\xb4\x00\x08\x79\x10\xae\xf5"
+			  "\x02\xbf\x85\xb2\x69\x4c\xc6\x04",
+		.ptext	= "\xac\x6a\xa8\x0c\xb0\x84\xbf\x4c"
+			  "\xae\x94\x20\x58\x7e\x00\x93\x89",
+		.ctext	= "\xd5\xaa\xe2\xe9\x86\x4c\x95\x4e"
+			  "\xde\xb6\x15\xcb\xdc\x1f\x13\x38",
+		.klen	= 32,
+		.len	= 16,
+	},
+	{
+		.key	= "\xed\xe3\x8b\xe7\x1c\x17\xbf\x4a"
+			  "\x02\xe2\xfc\x76\xac\xf5\x3c\x00"
+			  "\x5d\xdc\xfc\x83\xeb\x45\xb4\xcb"
+			  "\x59\x62\x60\xec\x69\x9c\x16\x45",
+		.iv	= "\xe4\x0e\x2b\x90\xd2\xfa\x94\x2e"
+			  "\x10\xe5\x64\x2b\x97\x28\x15\xc7",
+		.ptext	= "\xe6\x53\xff\x60\x0e\xc4\x51\xe4"
+			  "\x93\x4d\xe5\x55\xc5\xd9\xad\x48"
+			  "\x52",
+		.ctext	= "\xba\x25\x28\xf5\xcf\x31\x91\x80"
+			  "\xda\x2b\x95\x5f\x20\xcb\xfb\x9f"
+			  "\xc6",
+		.klen	= 32,
+		.len	= 17,
+	},
+	{
+		.key	= "\x77\x5c\xc0\x73\x9a\x64\x97\x91"
+			  "\x2f\xee\xe0\x20\xc2\x04\x59\x2e"
+			  "\x97\xd2\xa7\x70\xb3\xb0\x21\x6b"
+			  "\x8f\xbf\xb8\x51\xa8\xea\x0f\x62",
+		.iv	= "\x31\x8e\x1f\xcd\xfd\x23\xeb\x7f"
+			  "\x8a\x1f\x1b\x23\x53\x27\x44\xe5",
+		.ptext	= "\xcd\xff\x8c\x9b\x94\x5a\x51\x3f"
+			  "\x40\x93\x56\x93\x66\x39\x63\x1f"
+			  "\xbf\xe6\xa4\xfa\xbe\x79\x93\x03"
+			  "\xf5\x66\x74\x16\xfc\xe4\xce",
+		.ctext	= "\x8b\xd3\xc3\xce\x66\xf8\x66\x4c"
+			  "\xad\xd6\xf5\x0f\xd8\x99\x5a\x75"
+			  "\xa1\x3c\xab\x0b\x21\x36\x57\x72"
+			  "\x88\x29\xe9\xea\x4a\x8d\xe9",
+		.klen	= 32,
+		.len	= 31,
+	},
+	{
+		.key	= "\xa1\x2f\x4d\xde\xfe\xa1\xff\xa8"
+			  "\x73\xdd\xe3\xe2\x95\xfc\xea\x9c"
+			  "\xd0\x80\x42\x0c\xb8\x43\x3e\x99"
+			  "\x39\x38\x0a\x8c\xe8\x45\x3a\x7b",
+		.iv	= "\x32\xc4\x6f\xb1\x14\x43\xd1\x87"
+			  "\xe2\x6f\x5a\x58\x02\x36\x7e\x2a",
+		.ptext	= "\x9e\x5c\x1e\xf1\xd6\x7d\x09\x57"
+			  "\x18\x48\x55\xda\x7d\x44\xf9\x6d"
+			  "\xac\xcd\x59\xbb\x10\xa2\x94\x67"
+			  "\xd1\x6f\xfe\x6b\x4a\x11\xe8\x04"
+			  "\x09\x26\x4f\x8d\x5d\xa1\x7b\x42"
+			  "\xf9\x4b\x66\x76\x38\x12\xfe\xfe",
+		.ctext	= "\x42\xbc\xa7\x64\x15\x9a\x04\x71"
+			  "\x2c\x5f\x94\xba\x89\x3a\xad\xbc"
+			  "\x87\xb3\xf4\x09\x4f\x57\x06\x18"
+			  "\xdc\x84\x20\xf7\x64\x85\xca\x3b"
+			  "\xab\xe6\x33\x56\x34\x60\x5d\x4b"
+			  "\x2e\x16\x13\xd4\x77\xde\x2d\x2b",
+		.klen	= 32,
+		.len	= 48,
+	},
+	{
+		.key	= "\xfb\xf5\xb7\x3d\xa6\x95\x42\xbf"
+			  "\xd2\x94\x6c\x74\x0f\xbc\x5a\x28"
+			  "\x35\x3c\x51\x58\x84\xfb\x7d\x11"
+			  "\x16\x1e\x00\x97\x37\x08\xb7\x16",
+		.iv	= "\x9b\x53\x57\x40\xe6\xd9\xa7\x27"
+			  "\x78\xd4\x9b\xd2\x29\x1d\x24\xa9",
+		.ptext	= "\x8b\x02\x60\x0a\x3e\xb7\x10\x59"
+			  "\xc3\xac\xd5\x2a\x75\x81\xf2\xdb"
+			  "\x55\xca\x65\x86\x44\xfb\xfe\x91"
+			  "\x26\xbb\x45\xb2\x46\x22\x3e\x08"
+			  "\xa2\xbf\x46\xcb\x68\x7d\x45\x7b"
+			  "\xa1\x6a\x3c\x6e\x25\xeb\xed\x31"
+			  "\x7a\x8b\x47\xf9\xde\xec\x3d\x87"
+			  "\x09\x20\x2e\xfa\xba\x8b\x9b\xc5"
+			  "\x6c\x25\x9c\x9d\x2a\xe8\xab\x90"
+			  "\x3f\x86\xee\x61\x13\x21\xd4\xde"
+			  "\xe1\x0c\x95\xfc\x5c\x8a\x6e\x0a"
+			  "\x73\xcf\x08\x69\x44\x4e\xde\x25"
+			  "\xaf\xaa\x56\x04\xc4\xb3\x60\x44"
+			  "\x3b\x8b\x3d\xee\xae\x42\x4b\xd2"
+			  "\x9a\x6c\xa0\x8e\x52\x06\xb2\xd1"
+			  "\x5d\x38\x30\x6d\x27\x9b\x1a\xd8",
+		.ctext	= "\xa3\x78\x33\x78\x95\x95\x97\x07"
+			  "\x53\xa3\xa1\x5b\x18\x32\x27\xf7"
+			  "\x09\x12\x53\x70\x83\xb5\x6a\x9f"
+			  "\x26\x6d\x10\x0d\xe0\x1c\xe6\x2b"
+			  "\x70\x00\xdc\xa1\x60\xef\x1b\xee"
+			  "\xc5\xa5\x51\x17\xae\xcc\xf2\xed"
+			  "\xc4\x60\x07\xdf\xd5\x7a\xe9\x90"
+			  "\x3c\x9f\x96\x5d\x72\x65\x5d\xef"
+			  "\xd0\x94\x32\xc4\x85\x90\x78\xa1"
+			  "\x2e\x64\xf6\xee\x8e\x74\x3f\x20"
+			  "\x2f\x12\x3b\x3d\xd5\x39\x8e\x5a"
+			  "\xf9\x8f\xce\x94\x5d\x82\x18\x66"
+			  "\x14\xaf\x4c\xfe\xe0\x91\xc3\x4a"
+			  "\x85\xcf\xe7\xe8\xf7\xcb\xf0\x31"
+			  "\x88\x7d\xc9\x5b\x71\x9d\x5f\xd2"
+			  "\xfa\xed\xa6\x24\xda\xbb\xb1\x84",
+		.klen	= 32,
+		.len	= 128,
+	},
+	{
+		.key	= "\x32\x37\x2b\x8f\x7b\xb1\x23\x79"
+			  "\x05\x52\xde\x05\xf1\x68\x3f\x6c"
+			  "\xa4\xae\xbc\x21\xc2\xc6\xf0\xbd"
+			  "\x0f\x20\xb7\xa4\xc5\x05\x7b\x64",
+		.iv	= "\xff\x26\x4e\x67\x48\xdd\xcf\xfe"
+			  "\x42\x09\x04\x98\x5f\x1e\xfa\x80",
+		.ptext	= "\x99\xdc\x3b\x19\x41\xf9\xff\x6e"
+			  "\x76\xb5\x03\xfa\x61\xed\xf8\x44"
+			  "\x70\xb9\xf0\x83\x80\x6e\x31\x77"
+			  "\x77\xe4\xc7\xb4\x77\x02\xab\x91"
+			  "\x82\xc6\xf8\x7c\x46\x61\x03\x69"
+			  "\x09\xa0\xf7\x12\xb7\x81\x6c\xa9"
+			  "\x10\x5c\xbb\x55\xb3\x44\xed\xb5"
+			  "\xa2\x52\x48\x71\x90\x5d\xda\x40"
+			  "\x0b\x7f\x4a\x11\x6d\xa7\x3d\x8e"
+			  "\x1b\xcd\x9d\x4e\x75\x8b\x7d\x87"
+			  "\xe5\x39\x34\x32\x1e\xe6\x8d\x51"
+			  "\xd4\x1f\xe3\x1d\x50\xa0\x22\x37"
+			  "\x7c\xb0\xd9\xfb\xb6\xb2\x16\xf6"
+			  "\x6d\x26\xa0\x4e\x8c\x6a\xe6\xb6"
+			  "\xbe\x4c\x7c\xe3\x88\x10\x18\x90"
+			  "\x11\x50\x19\x90\xe7\x19\x3f\xd0"
+			  "\x31\x15\x0f\x06\x96\xfe\xa7\x7b"
+			  "\xc3\x32\x88\x69\xa4\x12\xe3\x64"
+			  "\x02\x30\x17\x74\x6c\x88\x7c\x9b"
+			  "\xd6\x6d\x75\xdf\x11\x86\x70\x79"
+			  "\x48\x7d\x34\x3e\x33\x58\x07\x8b"
+			  "\xd2\x50\xac\x35\x15\x45\x05\xb4"
+			  "\x4d\x31\x97\x19\x87\x23\x4b\x87"
+			  "\x53\xdc\xa9\x19\x78\xf1\xbf\x35"
+			  "\x30\x04\x14\xd4\xcf\xb2\x8c\x87"
+			  "\x7d\xdb\x69\xc9\xcd\xfe\x40\x3e"
+			  "\x8d\x66\x5b\x61\xe5\xf0\x2d\x87"
+			  "\x93\x3a\x0c\x2b\x04\x98\x05\xc2"
+			  "\x56\x4d\xc4\x6c\xcd\x7a\x98\x7e"
+			  "\xe2\x2d\x79\x07\x91\x9f\xdf\x2f"
+			  "\x72\xc9\x8f\xcb\x0b\x87\x1b\xb7"
+			  "\x04\x86\xcb\x47\xfa\x5d\x03",
+		.ctext	= "\x0b\x00\xf7\xf2\xc8\x6a\xba\x9a"
+			  "\x0a\x97\x18\x7a\x00\xa0\xdb\xf4"
+			  "\x5e\x8e\x4a\xb7\xe0\x51\xf1\x75"
+			  "\x17\x8b\xb4\xf1\x56\x11\x05\x9f"
+			  "\x2f\x2e\xba\x67\x04\xe1\xb4\xa5"
+			  "\xfc\x7c\x8c\xad\xc6\xb9\xd1\x64"
+			  "\xca\xbd\x5d\xaf\xdb\x65\x48\x4f"
+			  "\x1b\xb3\x94\x5c\x0b\xd0\xee\xcd"
+			  "\xb5\x7f\x43\x8a\xd8\x8b\x66\xde"
+			  "\xd2\x9c\x13\x65\xa4\x47\xa7\x03"
+			  "\xc5\xa1\x46\x8f\x2f\x84\xbc\xef"
+			  "\x48\x9d\x9d\xb5\xbd\x43\xff\xd2"
+			  "\xd2\x7a\x5a\x13\xbf\xb4\xf6\x05"
+			  "\x17\xcd\x01\x12\xf0\x35\x27\x96"
+			  "\xf4\xc1\x65\xf7\x69\xef\x64\x1b"
+			  "\x6e\x4a\xe8\x77\xce\x83\x01\xb7"
+			  "\x60\xe6\x45\x2a\xcd\x41\x4a\xb5"
+			  "\x8e\xcc\x45\x93\xf1\xd6\x64\x5f"
+			  "\x32\x60\xe4\x29\x4a\x82\x6c\x86"
+			  "\x16\xe4\xcc\xdb\x5f\xc8\x11\xa6"
+			  "\xfe\x88\xd6\xc3\xe5\x5c\xbb\x67"
+			  "\xec\xa5\x7b\xf5\xa8\x4f\x77\x25"
+			  "\x5d\x0c\x2a\x99\xf9\xb9\xd1\xae"
+			  "\x3c\x83\x2a\x93\x9b\x66\xec\x68"
+			  "\x2c\x93\x02\x8a\x8a\x1e\x2f\x50"
+			  "\x09\x37\x19\x5c\x2a\x3a\xc2\xcb"
+			  "\xcb\x89\x82\x81\xb7\xbb\xef\x73"
+			  "\x8b\xc9\xae\x42\x96\xef\x70\xc0"
+			  "\x89\xc7\x3e\x6a\x26\xc3\xe4\x39"
+			  "\x53\xa9\xcf\x63\x7d\x05\xf3\xff"
+			  "\x52\x04\xf6\x7f\x23\x96\xe9\xf7"
+			  "\xff\xd6\x50\xa3\x0e\x20\x71",
+		.klen	= 32,
+		.len	= 255,
+	},
+	{
+		.key	= "\x39\x5f\xf4\x9c\x90\x3a\x9a\x25"
+			  "\x15\x11\x79\x39\xed\x26\x5e\xf6"
+			  "\xda\xcf\x33\x4f\x82\x97\xab\x10"
+			  "\xc1\x55\x48\x82\x80\xa8\x02\xb2",
+		.iv	= "\x82\x60\xd9\x06\xeb\x40\x99\x76"
+			  "\x08\xc5\xa4\x83\x45\xb8\x38\x5a",
+		.ptext	= "\xa1\xa8\xac\xac\x08\xaf\x8f\x84"
+			  "\xbf\xcc\x79\x31\x5e\x61\x01\xd1"
+			  "\x4d\x5f\x9b\xcd\x91\x92\x9a\xa1"
+			  "\x99\x0d\x49\xb2\xd7\xfd\x25\x93"
+			  "\x51\x96\xbd\x91\x8b\x08\xf1\xc6"
+			  "\x0d\x17\xf6\xef\xfd\xd2\x78\x16"
+			  "\xc8\x08\x27\x7b\xca\x98\xc6\x12"
+			  "\x86\x11\xdb\xd5\x08\x3d\x5a\x2c"
+			  "\xcf\x15\x0e\x9b\x42\x78\xeb\x1f"
+			  "\x52\xbc\xd7\x5a\x8a\x33\x6c\x14"
+			  "\xfc\x61\xad\x2e\x1e\x03\x66\xea"
+			  "\x79\x0e\x88\x88\xde\x93\xe3\x81"
+			  "\xb5\xc4\x1c\xe6\x9c\x08\x18\x8e"
+			  "\xa0\x87\xda\xe6\xf8\xcb\x30\x44"
+			  "\x2d\x4e\xc0\xa3\x60\xf9\x62\x7b"
+			  "\x4b\xd5\x61\x6d\xe2\x67\x95\x54"
+			  "\x10\xd1\xca\x22\xe8\xb6\xb1\x3a"
+			  "\x2d\xd7\x35\x5b\x22\x88\x55\x67"
+			  "\x3d\x83\x8f\x07\x98\xa8\xf2\xcf"
+			  "\x04\xb7\x9e\x52\xca\xe0\x98\x72"
+			  "\x5c\xc1\x00\xd4\x1f\x2c\x61\xf3"
+			  "\xe8\x40\xaf\x4a\xee\x66\x41\xa0"
+			  "\x02\x77\x29\x30\x65\x59\x4b\x20"
+			  "\x7b\x0d\x80\x97\x27\x7f\xd5\x90"
+			  "\xbb\x9d\x76\x90\xe5\x43\x43\x72"
+			  "\xd0\xd4\x14\x75\x66\xb3\xb6\xaf"
+			  "\x09\xe4\x23\xb0\x62\xad\x17\x28"
+			  "\x39\x26\xab\xf5\xf7\x5c\xb6\x33"
+			  "\xbd\x27\x09\x5b\x29\xe4\x40\x0b"
+			  "\xc1\x26\x32\xdb\x9a\xdf\xf9\x5a"
+			  "\xae\x03\x2c\xa4\x40\x84\x9a\xb7"
+			  "\x4e\x47\xa8\x0f\x23\xc7\xbb\xcf"
+			  "\x2b\xf2\x32\x6c\x35\x6a\x91\xba"
+			  "\x0e\xea\xa2\x8b\x2f\xbd\xb5\xea"
+			  "\x6e\xbc\xb5\x4b\x03\xb3\x86\xe0"
+			  "\x86\xcf\xba\xcb\x38\x2c\x32\xa6"
+			  "\x6d\xe5\x28\xa6\xad\xd2\x7f\x73"
+			  "\x43\x14\xf8\xb1\x99\x12\x2d\x2b"
+			  "\xdf\xcd\xf2\x81\x43\x94\xdf\xb1"
+			  "\x17\xc9\x33\xa6\x3d\xef\x96\xb8"
+			  "\xd6\x0d\x00\xec\x49\x66\x85\x5d"
+			  "\x44\x62\x12\x04\x55\x5c\x48\xd3"
+			  "\xbd\x73\xac\x54\x8f\xbf\x97\x8e"
+			  "\x85\xfd\xc2\xa1\x25\x32\x38\x6a"
+			  "\x1f\xac\x57\x3c\x4f\x56\x73\xf2"
+			  "\x1d\xb6\x48\x68\xc7\x0c\xe7\x60"
+			  "\xd2\x8e\x4d\xfb\xc7\x20\x7b\xb7"
+			  "\x45\x28\x12\xc6\x26\xae\xea\x7c"
+			  "\x5d\xe2\x46\xb5\xae\xe1\xc3\x98"
+			  "\x6f\x72\xd5\xa2\xfd\xed\x40\xfd"
+			  "\xf9\xdf\x61\xec\x45\x2c\x15\xe0"
+			  "\x1e\xbb\xde\x71\x37\x5f\x73\xc2"
+			  "\x11\xcc\x6e\x6d\xe1\xb5\x1b\xd2"
+			  "\x2a\xdd\x19\x8a\xc2\xe1\xa0\xa4"
+			  "\x26\xeb\xb2\x2c\x4f\x77\x52\xf1"
+			  "\x42\x72\x6c\xad\xd7\x78\x5d\x72"
+			  "\xc9\x16\x26\x25\x1b\x4c\xe6\x58"
+			  "\x79\x57\xb5\x06\x15\x4f\xe5\xba"
+			  "\xa2\x7f\x2d\x5b\x87\x8a\x44\x70"
+			  "\xec\xc7\xef\x84\xae\x60\xa2\x61"
+			  "\x86\xe9\x18\xcd\x28\xc4\xa4\xf5"
+			  "\xbc\x84\xb8\x86\xa0\xba\xf1\xf1"
+			  "\x08\x3b\x32\x75\x35\x22\x7a\x65"
+			  "\xca\x48\xe8\xef\x6e\xe2\x8e\x00",
+		.ctext	= "\x2f\xae\xd8\x67\xeb\x15\xde\x75"
+			  "\x53\xa3\x0e\x5a\xcf\x1c\xbe\xea"
+			  "\xde\xf9\xcf\xc2\x9f\xfd\x0f\x44"
+			  "\xc0\xe0\x7a\x76\x1d\xcb\x4a\xf8"
+			  "\x35\xd6\xe3\x95\x98\x6b\x3f\x89"
+			  "\xc4\xe6\xb6\x6f\xe1\x8b\x39\x4b"
+			  "\x1c\x6c\x77\xe4\xe1\x8a\xbc\x61"
+			  "\x00\x6a\xb1\x37\x2f\x45\xe6\x04"
+			  "\x52\x0b\xfc\x1e\x32\xc1\xd8\x9d"
+			  "\xfa\xdd\x67\x5c\xe0\x75\x83\xd0"
+			  "\x21\x9e\x02\xea\xc0\x7f\xc0\x29"
+			  "\xb3\x6c\xa5\x97\xb3\x29\x82\x1a"
+			  "\x94\xa5\xb4\xb6\x49\xe5\xa5\xad"
+			  "\x95\x40\x52\x7c\x84\x88\xa4\xa8"
+			  "\x26\xe4\xd9\x5d\x41\xf2\x93\x7b"
+			  "\xa4\x48\x1b\x66\x91\xb9\x7c\xc2"
+			  "\x99\x29\xdf\xd8\x30\xac\xd4\x47"
+			  "\x42\xa0\x14\x87\x67\xb8\xfd\x0b"
+			  "\x1e\xcb\x5e\x5c\x9a\xc2\x04\x8b"
+			  "\x17\x29\x9d\x99\x7f\x86\x4c\xe2"
+			  "\x5c\x96\xa6\x0f\xb6\x47\x33\x5c"
+			  "\xe4\x50\x49\xd5\x4f\x92\x0b\x9a"
+			  "\xbc\x52\x4c\x41\xf5\xc9\x3e\x76"
+			  "\x55\x55\xd4\xdc\x71\x14\x23\xfc"
+			  "\x5f\xd5\x08\xde\xa0\xf7\x28\xc0"
+			  "\xe1\x61\xac\x64\x66\xf6\xd1\x31"
+			  "\xe4\xa4\xa9\xed\xbc\xad\x4f\x3b"
+			  "\x59\xb9\x48\x1b\xe7\xb1\x6f\xc6"
+			  "\xba\x40\x1c\x0b\xe7\x2f\x31\x65"
+			  "\x85\xf5\xe9\x14\x0a\x31\xf5\xf3"
+			  "\xc0\x1c\x20\x35\x73\x38\x0f\x8e"
+			  "\x39\xf0\x68\xae\x08\x9c\x87\x4b"
+			  "\x42\xfc\x22\x17\xee\x96\x51\x2a"
+			  "\xd8\x57\x5a\x35\xea\x72\x74\xfc"
+			  "\xb3\x0e\x69\x9a\xe1\x4f\x24\x90"
+			  "\xc5\x4b\xe5\xd7\xe3\x82\x2f\xc5"
+			  "\x62\x46\x3e\xab\x72\x4e\xe0\xf3"
+			  "\x90\x09\x4c\xb2\xe1\xe8\xa0\xf5"
+			  "\x46\x40\x2b\x47\x85\x3c\x21\x90"
+			  "\x3d\xad\x25\x5a\x36\xdf\xe5\xbc"
+			  "\x7e\x80\x4d\x53\x77\xf1\x79\xa6"
+			  "\xec\x22\x80\x88\x68\xd6\x2d\x8b"
+			  "\x3e\xf7\x52\xc7\x2a\x20\x42\x5c"
+			  "\xed\x99\x4f\x32\x80\x00\x7e\x73"
+			  "\xd7\x6d\x7f\x7d\x42\x54\x4a\xfe"
+			  "\xff\x6f\x61\xca\x2a\xbb\x4f\xeb"
+			  "\x4f\xe4\x4e\xaf\x2c\x4f\x82\xcd"
+			  "\xa1\xa7\x11\xb3\x34\x33\xcf\x32"
+			  "\x63\x0e\x24\x3a\x35\xbe\x06\xd5"
+			  "\x17\xcb\x02\x30\x33\x6e\x8c\x49"
+			  "\x40\x6e\x34\x8c\x07\xd4\x3e\xe6"
+			  "\xaf\x78\x6d\x8c\x10\x5f\x21\x58"
+			  "\x49\x26\xc5\xaf\x0d\x7d\xd4\xaf"
+			  "\xcd\x5b\xa1\xe3\xf6\x39\x1c\x9b"
+			  "\x8e\x00\xa1\xa7\x9e\x17\x4a\xc0"
+			  "\x54\x56\x9e\xcf\xcf\x88\x79\x8d"
+			  "\x50\xf7\x56\x8e\x0a\x73\x46\x6b"
+			  "\xc3\xb9\x9b\x6c\x7d\xc4\xc8\xb6"
+			  "\x03\x5f\x30\x62\x7d\xe6\xdb\x15"
+			  "\xe1\x39\x02\x8c\xff\xda\xc8\x43"
+			  "\xf2\xa9\xbf\x00\xe7\x3a\x61\x89"
+			  "\xdf\xb0\xca\x7d\x8c\x8a\x6a\x9f"
+			  "\x18\x89\x3d\x39\xac\x36\x6f\x05"
+			  "\x1f\xb5\xda\x00\xea\xe1\x51\x21",
+		.klen	= 32,
+		.len	= 512,
+	},
 
-static const struct hash_testvec blakes2s_160_tv_template[] = {{
-	.plaintext = blake2_ordered_sequence,
-	.psize = 7,
-	.digest = (u8[]){ 0xb4, 0xf2, 0x03, 0x49, 0x37, 0xed, 0xb1, 0x3e,
-			  0x5b, 0x2a, 0xca, 0x64, 0x82, 0x74, 0xf6, 0x62,
-			  0xe3, 0xf2, 0x84, 0xff, },
-}, {
-	.plaintext = blake2_ordered_sequence,
-	.psize = 256,
-	.digest = (u8[]){ 0xaa, 0x56, 0x9b, 0xdc, 0x98, 0x17, 0x75, 0xf2,
-			  0xb3, 0x68, 0x83, 0xb7, 0x9b, 0x8d, 0x48, 0xb1,
-			  0x9b, 0x2d, 0x35, 0x05, },
-}, {
-	.ksize = 1,
-	.key = "B",
-	.digest = (u8[]){ 0x50, 0x16, 0xe7, 0x0c, 0x01, 0xd0, 0xd3, 0xc3,
-			  0xf4, 0x3e, 0xb1, 0x6e, 0x97, 0xa9, 0x4e, 0xd1,
-			  0x79, 0x65, 0x32, 0x93, },
-}, {
-	.ksize = 32,
-	.key = blake2_ordered_sequence,
-	.plaintext = blake2_ordered_sequence,
-	.psize = 1,
-	.digest = (u8[]){ 0x1c, 0x2b, 0xcd, 0x9a, 0x68, 0xca, 0x8c, 0x71,
-			  0x90, 0x29, 0x6c, 0x54, 0xfa, 0x56, 0x4a, 0xef,
-			  0xa2, 0x3a, 0x56, 0x9c, },
-}, {
-	.ksize = 16,
-	.key = blake2_ordered_sequence,
-	.plaintext = blake2_ordered_sequence,
-	.psize = 15,
-	.digest = (u8[]){ 0x36, 0xc3, 0x5f, 0x9a, 0xdc, 0x7e, 0xbf, 0x19,
-			  0x68, 0xaa, 0xca, 0xd8, 0x81, 0xbf, 0x09, 0x34,
-			  0x83, 0x39, 0x0f, 0x30, },
-}, {
-	.ksize = 1,
-	.key = "B",
-	.plaintext = blake2_ordered_sequence,
-	.psize = 64,
-	.digest = (u8[]){ 0x86, 0x80, 0x78, 0xa4, 0x14, 0xec, 0x03, 0xe5,
-			  0xb6, 0x9a, 0x52, 0x0e, 0x42, 0xee, 0x39, 0x9d,
-			  0xac, 0xa6, 0x81, 0x63, },
-}, {
-	.ksize = 32,
-	.key = blake2_ordered_sequence,
-	.plaintext = blake2_ordered_sequence,
-	.psize = 247,
-	.digest = (u8[]){ 0x2d, 0xd8, 0xd2, 0x53, 0x66, 0xfa, 0xa9, 0x01,
-			  0x1c, 0x9c, 0xaf, 0xa3, 0xe2, 0x9d, 0x9b, 0x10,
-			  0x0a, 0xf6, 0x73, 0xe8, },
-}};
+};
 
-static const struct hash_testvec blakes2s_224_tv_template[] = {{
-	.plaintext = blake2_ordered_sequence,
-	.psize = 1,
-	.digest = (u8[]){ 0x61, 0xb9, 0x4e, 0xc9, 0x46, 0x22, 0xa3, 0x91,
-			  0xd2, 0xae, 0x42, 0xe6, 0x45, 0x6c, 0x90, 0x12,
-			  0xd5, 0x80, 0x07, 0x97, 0xb8, 0x86, 0x5a, 0xfc,
-			  0x48, 0x21, 0x97, 0xbb, },
-}, {
-	.plaintext = blake2_ordered_sequence,
-	.psize = 247,
-	.digest = (u8[]){ 0x9e, 0xda, 0xc7, 0x20, 0x2c, 0xd8, 0x48, 0x2e,
-			  0x31, 0x94, 0xab, 0x46, 0x6d, 0x94, 0xd8, 0xb4,
-			  0x69, 0xcd, 0xae, 0x19, 0x6d, 0x9e, 0x41, 0xcc,
-			  0x2b, 0xa4, 0xd5, 0xf6, },
-}, {
-	.ksize = 16,
-	.key = blake2_ordered_sequence,
-	.digest = (u8[]){ 0x32, 0xc0, 0xac, 0xf4, 0x3b, 0xd3, 0x07, 0x9f,
-			  0xbe, 0xfb, 0xfa, 0x4d, 0x6b, 0x4e, 0x56, 0xb3,
-			  0xaa, 0xd3, 0x27, 0xf6, 0x14, 0xbf, 0xb9, 0x32,
-			  0xa7, 0x19, 0xfc, 0xb8, },
-}, {
-	.ksize = 1,
-	.key = "B",
-	.plaintext = blake2_ordered_sequence,
-	.psize = 7,
-	.digest = (u8[]){ 0x73, 0xad, 0x5e, 0x6d, 0xb9, 0x02, 0x8e, 0x76,
-			  0xf2, 0x66, 0x42, 0x4b, 0x4c, 0xfa, 0x1f, 0xe6,
-			  0x2e, 0x56, 0x40, 0xe5, 0xa2, 0xb0, 0x3c, 0xe8,
-			  0x7b, 0x45, 0xfe, 0x05, },
-}, {
-	.ksize = 32,
-	.key = blake2_ordered_sequence,
-	.plaintext = blake2_ordered_sequence,
-	.psize = 15,
-	.digest = (u8[]){ 0x16, 0x60, 0xfb, 0x92, 0x54, 0xb3, 0x6e, 0x36,
-			  0x81, 0xf4, 0x16, 0x41, 0xc3, 0x3d, 0xd3, 0x43,
-			  0x84, 0xed, 0x10, 0x6f, 0x65, 0x80, 0x7a, 0x3e,
-			  0x25, 0xab, 0xc5, 0x02, },
-}, {
-	.ksize = 16,
-	.key = blake2_ordered_sequence,
-	.plaintext = blake2_ordered_sequence,
-	.psize = 64,
-	.digest = (u8[]){ 0xca, 0xaa, 0x39, 0x67, 0x9c, 0xf7, 0x6b, 0xc7,
-			  0xb6, 0x82, 0xca, 0x0e, 0x65, 0x36, 0x5b, 0x7c,
-			  0x24, 0x00, 0xfa, 0x5f, 0xda, 0x06, 0x91, 0x93,
-			  0x6a, 0x31, 0x83, 0xb5, },
-}, {
-	.ksize = 1,
-	.key = "B",
-	.plaintext = blake2_ordered_sequence,
-	.psize = 256,
-	.digest = (u8[]){ 0x90, 0x02, 0x26, 0xb5, 0x06, 0x9c, 0x36, 0x86,
-			  0x94, 0x91, 0x90, 0x1e, 0x7d, 0x2a, 0x71, 0xb2,
-			  0x48, 0xb5, 0xe8, 0x16, 0xfd, 0x64, 0x33, 0x45,
-			  0xb3, 0xd7, 0xec, 0xcc, },
-}};
+/*
+ * Test vectors generated using https://github.com/google/hctr2
+ *
+ * To ensure compatibility with RFC 8452, some tests were sourced from
+ * https://datatracker.ietf.org/doc/html/rfc8452
+ */
+static const struct hash_testvec polyval_tv_template[] = {
+	{ // From RFC 8452
+		.key	= "\x31\x07\x28\xd9\x91\x1f\x1f\x38"
+			  "\x37\xb2\x43\x16\xc3\xfa\xb9\xa0",
+		.plaintext	= "\x65\x78\x61\x6d\x70\x6c\x65\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x48\x65\x6c\x6c\x6f\x20\x77\x6f"
+			  "\x72\x6c\x64\x00\x00\x00\x00\x00"
+			  "\x38\x00\x00\x00\x00\x00\x00\x00"
+			  "\x58\x00\x00\x00\x00\x00\x00\x00",
+		.digest	= "\xad\x7f\xcf\x0b\x51\x69\x85\x16"
+			  "\x62\x67\x2f\x3c\x5f\x95\x13\x8f",
+		.psize	= 48,
+		.ksize	= 16,
+	},
+	{ // From RFC 8452
+		.key	= "\xd9\xb3\x60\x27\x96\x94\x94\x1a"
+			  "\xc5\xdb\xc6\x98\x7a\xda\x73\x77",
+		.plaintext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.digest	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.psize	= 16,
+		.ksize	= 16,
+	},
+	{ // From RFC 8452
+		.key	= "\xd9\xb3\x60\x27\x96\x94\x94\x1a"
+			  "\xc5\xdb\xc6\x98\x7a\xda\x73\x77",
+		.plaintext	= "\x01\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x40\x00\x00\x00\x00\x00\x00\x00",
+		.digest	= "\xeb\x93\xb7\x74\x09\x62\xc5\xe4"
+			  "\x9d\x2a\x90\xa7\xdc\x5c\xec\x74",
+		.psize	= 32,
+		.ksize	= 16,
+	},
+	{ // From RFC 8452
+		.key	= "\xd9\xb3\x60\x27\x96\x94\x94\x1a"
+			  "\xc5\xdb\xc6\x98\x7a\xda\x73\x77",
+		.plaintext	= "\x01\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x02\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x03\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x80\x01\x00\x00\x00\x00\x00\x00",
+		.digest	= "\x81\x38\x87\x46\xbc\x22\xd2\x6b"
+			  "\x2a\xbc\x3d\xcb\x15\x75\x42\x22",
+		.psize	= 64,
+		.ksize	= 16,
+	},
+	{ // From RFC 8452
+		.key	= "\xd9\xb3\x60\x27\x96\x94\x94\x1a"
+			  "\xc5\xdb\xc6\x98\x7a\xda\x73\x77",
+		.plaintext	= "\x01\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x02\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x03\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x04\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x02\x00\x00\x00\x00\x00\x00",
+		.digest	= "\x1e\x39\xb6\xd3\x34\x4d\x34\x8f"
+			  "\x60\x44\xf8\x99\x35\xd1\xcf\x78",
+		.psize	= 80,
+		.ksize	= 16,
+	},
+	{ // From RFC 8452
+		.key	= "\xd9\xb3\x60\x27\x96\x94\x94\x1a"
+			  "\xc5\xdb\xc6\x98\x7a\xda\x73\x77",
+		.plaintext	= "\x01\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x02\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x03\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x04\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x05\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x08\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x02\x00\x00\x00\x00\x00\x00",
+		.digest	= "\xff\xcd\x05\xd5\x77\x0f\x34\xad"
+			  "\x92\x67\xf0\xa5\x99\x94\xb1\x5a",
+		.psize	= 96,
+		.ksize	= 16,
+	},
+	{ // Random ( 1)
+		.key	= "\x90\xcc\xac\xee\xba\xd7\xd4\x68"
+			  "\x98\xa6\x79\x70\xdf\x66\x15\x6c",
+		.plaintext	= "",
+		.digest	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.psize	= 0,
+		.ksize	= 16,
+	},
+	{ // Random ( 1)
+		.key	= "\xc1\x45\x71\xf0\x30\x07\x94\xe7"
+			  "\x3a\xdd\xe4\xc6\x19\x2d\x02\xa2",
+		.plaintext	= "\xc1\x5d\x47\xc7\x4c\x7c\x5e\x07"
+			  "\x85\x14\x8f\x79\xcc\x73\x83\xf7"
+			  "\x35\xb8\xcb\x73\x61\xf0\x53\x31"
+			  "\xbf\x84\xde\xb6\xde\xaf\xb0\xb8"
+			  "\xb7\xd9\x11\x91\x89\xfd\x1e\x4c"
+			  "\x84\x4a\x1f\x2a\x87\xa4\xaf\x62"
+			  "\x8d\x7d\x58\xf6\x43\x35\xfc\x53"
+			  "\x8f\x1a\xf6\x12\xe1\x13\x3f\x66"
+			  "\x91\x4b\x13\xd6\x45\xfb\xb0\x7a"
+			  "\xe0\x8b\x8e\x99\xf7\x86\x46\x37"
+			  "\xd1\x22\x9e\x52\xf3\x3f\xd9\x75"
+			  "\x2c\x2c\xc6\xbb\x0e\x08\x14\x29"
+			  "\xe8\x50\x2f\xd8\xbe\xf4\xe9\x69"
+			  "\x4a\xee\xf7\xae\x15\x65\x35\x1e",
+		.digest	= "\x00\x4f\x5d\xe9\x3b\xc0\xd6\x50"
+			  "\x3e\x38\x73\x86\xc6\xda\xca\x7f",
+		.psize	= 112,
+		.ksize	= 16,
+	},
+	{ // Random ( 1)
+		.key	= "\x37\xbe\x68\x16\x50\xb9\x4e\xb0"
+			  "\x47\xde\xe2\xbd\xde\xe4\x48\x09",
+		.plaintext	= "\x87\xfc\x68\x9f\xff\xf2\x4a\x1e"
+			  "\x82\x3b\x73\x8f\xc1\xb2\x1b\x7a"
+			  "\x6c\x4f\x81\xbc\x88\x9b\x6c\xa3"
+			  "\x9c\xc2\xa5\xbc\x14\x70\x4c\x9b"
+			  "\x0c\x9f\x59\x92\x16\x4b\x91\x3d"
+			  "\x18\x55\x22\x68\x12\x8c\x63\xb2"
+			  "\x51\xcb\x85\x4b\xd2\xae\x0b\x1c"
+			  "\x5d\x28\x9d\x1d\xb1\xc8\xf0\x77"
+			  "\xe9\xb5\x07\x4e\x06\xc8\xee\xf8"
+			  "\x1b\xed\x72\x2a\x55\x7d\x16\xc9"
+			  "\xf2\x54\xe7\xe9\xe0\x44\x5b\x33"
+			  "\xb1\x49\xee\xff\x43\xfb\x82\xcd"
+			  "\x4a\x70\x78\x81\xa4\x34\x36\xe8"
+			  "\x4c\x28\x54\xa6\x6c\xc3\x6b\x78"
+			  "\xe7\xc0\x5d\xc6\x5d\x81\xab\x70"
+			  "\x08\x86\xa1\xfd\xf4\x77\x55\xfd"
+			  "\xa3\xe9\xe2\x1b\xdf\x99\xb7\x80"
+			  "\xf9\x0a\x4f\x72\x4a\xd3\xaf\xbb"
+			  "\xb3\x3b\xeb\x08\x58\x0f\x79\xce"
+			  "\xa5\x99\x05\x12\x34\xd4\xf4\x86"
+			  "\x37\x23\x1d\xc8\x49\xc0\x92\xae"
+			  "\xa6\xac\x9b\x31\x55\xed\x15\xc6"
+			  "\x05\x17\x37\x8d\x90\x42\xe4\x87"
+			  "\x89\x62\x88\x69\x1c\x6a\xfd\xe3"
+			  "\x00\x2b\x47\x1a\x73\xc1\x51\xc2"
+			  "\xc0\x62\x74\x6a\x9e\xb2\xe5\x21"
+			  "\xbe\x90\xb5\xb0\x50\xca\x88\x68"
+			  "\xe1\x9d\x7a\xdf\x6c\xb7\xb9\x98"
+			  "\xee\x28\x62\x61\x8b\xd1\x47\xf9"
+			  "\x04\x7a\x0b\x5d\xcd\x2b\x65\xf5"
+			  "\x12\xa3\xfe\x1a\xaa\x2c\x78\x42"
+			  "\xb8\xbe\x7d\x74\xeb\x59\xba\xba",
+		.digest	= "\xae\x11\xd4\x60\x2a\x5f\x9e\x42"
+			  "\x89\x04\xc2\x34\x8d\x55\x94\x0a",
+		.psize	= 256,
+		.ksize	= 16,
+	},
 
-static const struct hash_testvec blakes2s_256_tv_template[] = {{
-	.plaintext = blake2_ordered_sequence,
-	.psize = 15,
-	.digest = (u8[]){ 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21,
-			  0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67,
-			  0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04,
-			  0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d, },
-}, {
-	.ksize = 32,
-	.key = blake2_ordered_sequence,
-	.digest = (u8[]){ 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b,
-			  0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b,
-			  0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a,
-			  0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49, },
-}, {
-	.ksize = 1,
-	.key = "B",
-	.plaintext = blake2_ordered_sequence,
-	.psize = 1,
-	.digest = (u8[]){ 0x22, 0x27, 0xae, 0xaa, 0x6e, 0x81, 0x56, 0x03,
-			  0xa7, 0xe3, 0xa1, 0x18, 0xa5, 0x9a, 0x2c, 0x18,
-			  0xf4, 0x63, 0xbc, 0x16, 0x70, 0xf1, 0xe7, 0x4b,
-			  0x00, 0x6d, 0x66, 0x16, 0xae, 0x9e, 0x74, 0x4e, },
-}, {
-	.ksize = 16,
-	.key = blake2_ordered_sequence,
-	.plaintext = blake2_ordered_sequence,
-	.psize = 7,
-	.digest = (u8[]){ 0x58, 0x5d, 0xa8, 0x60, 0x1c, 0xa4, 0xd8, 0x03,
-			  0x86, 0x86, 0x84, 0x64, 0xd7, 0xa0, 0x8e, 0x15,
-			  0x2f, 0x05, 0xa2, 0x1b, 0xbc, 0xef, 0x7a, 0x34,
-			  0xb3, 0xc5, 0xbc, 0x4b, 0xf0, 0x32, 0xeb, 0x12, },
-}, {
-	.ksize = 32,
-	.key = blake2_ordered_sequence,
-	.plaintext = blake2_ordered_sequence,
-	.psize = 64,
-	.digest = (u8[]){ 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66,
-			  0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26,
-			  0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab,
-			  0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4, },
-}, {
-	.ksize = 1,
-	.key = "B",
-	.plaintext = blake2_ordered_sequence,
-	.psize = 247,
-	.digest = (u8[]){ 0x2e, 0x74, 0x1c, 0x1d, 0x03, 0xf4, 0x9d, 0x84,
-			  0x6f, 0xfc, 0x86, 0x32, 0x92, 0x49, 0x7e, 0x66,
-			  0xd7, 0xc3, 0x10, 0x88, 0xfe, 0x28, 0xb3, 0xe0,
-			  0xbf, 0x50, 0x75, 0xad, 0x8e, 0xa4, 0xe6, 0xb2, },
-}, {
-	.ksize = 16,
-	.key = blake2_ordered_sequence,
-	.plaintext = blake2_ordered_sequence,
-	.psize = 256,
-	.digest = (u8[]){ 0xb9, 0xd2, 0x81, 0x0e, 0x3a, 0xb1, 0x62, 0x9b,
-			  0xad, 0x44, 0x05, 0xf4, 0x92, 0x2e, 0x99, 0xc1,
-			  0x4a, 0x47, 0xbb, 0x5b, 0x6f, 0xb2, 0x96, 0xed,
-			  0xd5, 0x06, 0xb5, 0x3a, 0x7c, 0x7a, 0x65, 0x1d, },
-}};
+};
+
+/*
+ * Test vectors generated using https://github.com/google/hctr2
+ */
+static const struct cipher_testvec aes_hctr2_tv_template[] = {
+	{
+		.key	= "\xe1\x15\x66\x3c\x8d\xc6\x3a\xff"
+			  "\xef\x41\xd7\x47\xa2\xcc\x8a\xba",
+		.iv	= "\xc3\xbe\x2a\xcb\xb5\x39\x86\xf1"
+			  "\x91\xad\x6c\xf4\xde\x74\x45\x63"
+			  "\x5c\x7a\xd5\xcc\x8b\x76\xef\x0e"
+			  "\xcf\x2c\x60\x69\x37\xfd\x07\x96",
+		.ptext	= "\x65\x75\xae\xd3\xe2\xbc\x43\x5c"
+			  "\xb3\x1a\xd8\x05\xc3\xd0\x56\x29",
+		.ctext	= "\x11\x91\xea\x74\x58\xcc\xd5\xa2"
+			  "\xd0\x55\x9e\x3d\xfe\x7f\xc8\xfe",
+		.klen	= 16,
+		.len	= 16,
+	},
+	{
+		.key	= "\xe7\xd1\x77\x48\x76\x0b\xcd\x34"
+			  "\x2a\x2d\xe7\x74\xca\x11\x9c\xae",
+		.iv	= "\x71\x1c\x49\x62\xd9\x5b\x50\x5e"
+			  "\x68\x87\xbc\xf6\x89\xff\xed\x30"
+			  "\xe4\xe5\xbd\xb6\x10\x4f\x9f\x66"
+			  "\x28\x06\x5a\xf4\x27\x35\xcd\xe5",
+		.ptext	= "\x87\x03\x8f\x06\xa8\x61\x54\xda"
+			  "\x01\x45\xd4\x01\xef\x4a\x22\xcf"
+			  "\x78\x15\x9f\xbd\x64\xbd\x2c\xb9"
+			  "\x40\x1d\x72\xae\x53\x63\xa5",
+		.ctext	= "\x4e\xa1\x05\x27\xb8\x45\xe4\xa1"
+			  "\xbb\x30\xb4\xa6\x12\x74\x63\xd6"
+			  "\x17\xc9\xcc\x2f\x18\x64\xe0\x06"
+			  "\x0a\xa0\xff\x72\x10\x7b\x22",
+		.klen	= 16,
+		.len	= 31,
+	},
+	{
+		.key	= "\x59\x65\x3b\x1d\x43\x5e\xc0\xae"
+			  "\xb8\x9d\x9b\xdd\x22\x03\xbf\xca",
+		.iv	= "\xec\x95\xfa\x5a\xcf\x5e\xd2\x93"
+			  "\xa3\xb5\xe5\xbe\xf3\x01\x7b\x01"
+			  "\xd1\xca\x6c\x06\x82\xf0\xbd\x67"
+			  "\xd9\x6c\xa4\xdc\xb4\x38\x0f\x74",
+		.ptext	= "\x45\xdf\x75\x87\xbc\x72\xce\x55"
+			  "\xc9\xfa\xcb\xfc\x9f\x40\x82\x2b"
+			  "\xc6\x4f\x4f\x5b\x8b\x3b\x6d\x67"
+			  "\xa6\x93\x62\x89\x8c\x19\xf4\xe3"
+			  "\x08\x92\x9c\xc9\x47\x2c\x6e\xd0"
+			  "\xa3\x02\x2b\xdb\x2c\xf2\x8d\x46"
+			  "\xcd\xb0\x9d\x26\x63\x4c\x40\x6b"
+			  "\x79\x43\xe5\xce\x42\xa8\xec\x3b"
+			  "\x5b\xd0\xea\xa4\xe6\xdb\x66\x55"
+			  "\x7a\x76\xec\xab\x7d\x2a\x2b\xbd"
+			  "\xa9\xab\x22\x64\x1a\xa1\xae\x84"
+			  "\x86\x79\x67\xe9\xb2\x50\xbe\x12"
+			  "\x2f\xb2\x14\xf0\xdb\x71\xd8\xa7"
+			  "\x41\x8a\x88\xa0\x6a\x6e\x9d\x2a"
+			  "\xfa\x11\x37\x40\x32\x09\x4c\x47"
+			  "\x41\x07\x31\x85\x3d\xa8\xf7\x64",
+		.ctext	= "\x2d\x4b\x9f\x93\xca\x5a\x48\x26"
+			  "\x01\xcc\x54\xe4\x31\x50\x12\xf0"
+			  "\x49\xff\x59\x42\x68\xbd\x87\x8f"
+			  "\x9e\x62\x96\xcd\xb9\x24\x57\xa4"
+			  "\x0b\x7b\xf5\x2e\x0e\xa8\x65\x07"
+			  "\xab\x05\xd5\xca\xe7\x9c\x6c\x34"
+			  "\x5d\x42\x34\xa4\x62\xe9\x75\x48"
+			  "\x3d\x9e\x8f\xfa\x42\xe9\x75\x08"
+			  "\x4e\x54\x91\x2b\xbd\x11\x0f\x8e"
+			  "\xf0\x82\xf5\x24\xf1\xc4\xfc\xae"
+			  "\x42\x54\x7f\xce\x15\xa8\xb2\x33"
+			  "\xc0\x86\xb6\x2b\xe8\x44\xce\x1f"
+			  "\x68\x57\x66\x94\x6e\xad\xeb\xf3"
+			  "\x30\xf8\x11\xbd\x60\x00\xc6\xd5"
+			  "\x4c\x81\xf1\x20\x2b\x4a\x5b\x99"
+			  "\x79\x3b\xc9\x5c\x74\x23\xe6\x5d",
+		.klen	= 16,
+		.len	= 128,
+	},
+	{
+		.key	= "\x3e\x08\x5d\x64\x6c\x98\xec\xec"
+			  "\x70\x0e\x0d\xa1\x41\x20\x99\x82",
+		.iv	= "\x11\xb7\x77\x91\x0d\x99\xd9\x8d"
+			  "\x35\x3a\xf7\x14\x6b\x09\x37\xe5"
+			  "\xad\x51\xf6\xc3\x96\x4b\x64\x56"
+			  "\xa8\xbd\x81\xcc\xbe\x94\xaf\xe4",
+		.ptext	= "\xff\x8d\xb9\xc0\xe3\x69\xb3\xb2"
+			  "\x8b\x11\x26\xb3\x11\xec\xfb\xb9"
+			  "\x9c\xc1\x71\xd6\xe3\x26\x0e\xe0"
+			  "\x68\x40\x60\xb9\x3a\x63\x56\x8a"
+			  "\x9e\xc1\xf0\x10\xb1\x64\x32\x70"
+			  "\xf8\xcd\xc6\xc4\x49\x4c\xe1\xce"
+			  "\xf3\xe1\x03\xf8\x35\xae\xe0\x5e"
+			  "\xef\x5f\xbc\x41\x75\x26\x13\xcc"
+			  "\x37\x85\xdf\xc0\x5d\xa6\x47\x98"
+			  "\xf1\x97\x52\x58\x04\xe6\xb5\x01"
+			  "\xc0\xb8\x17\x6d\x74\xbd\x9a\xdf"
+			  "\xa4\x37\x94\x86\xb0\x13\x83\x28"
+			  "\xc9\xa2\x07\x3f\xb5\xb2\x72\x40"
+			  "\x0e\x60\xdf\x57\x07\xb7\x2c\x66"
+			  "\x10\x3f\x8d\xdd\x30\x0a\x47\xd5"
+			  "\xe8\x9d\xfb\xa1\xaf\x53\xd7\x05"
+			  "\xc7\xd2\xba\xe7\x2c\xa0\xbf\xb8"
+			  "\xd1\x93\xe7\x41\x82\xa3\x41\x3a"
+			  "\xaf\x12\xd6\xf8\x34\xda\x92\x46"
+			  "\xad\xa2\x2f\xf6\x7e\x46\x96\xd8"
+			  "\x03\xf3\x49\x64\xde\xd8\x06\x8b"
+			  "\xa0\xbc\x63\x35\x38\xb6\x6b\xda"
+			  "\x5b\x50\x3f\x13\xa5\x84\x1b\x1b"
+			  "\x66\x89\x95\xb7\xc2\x16\x3c\xe9"
+			  "\x24\xb0\x8c\x6f\x49\xef\xf7\x28"
+			  "\x6a\x24\xfd\xbe\x25\xe2\xb4\x90"
+			  "\x77\x44\x08\xb8\xda\xd2\xde\x2c"
+			  "\xa0\x57\x45\x57\x29\x47\x6b\x89"
+			  "\x4a\xf6\xa7\x2a\xc3\x9e\x7b\xc8"
+			  "\xfd\x9f\x89\xab\xee\x6d\xa3\xb4"
+			  "\x23\x90\x7a\xe9\x89\xa0\xc7\xb3"
+			  "\x17\x41\x87\x91\xfc\x97\x42",
+		.ctext	= "\xfc\x9b\x96\x66\xc4\x82\x2a\x4a"
+			  "\xb1\x24\xba\xc7\x78\x5f\x79\xc1"
+			  "\x57\x2e\x47\x29\x4d\x7b\xd2\x9a"
+			  "\xbd\xc6\xc1\x26\x7b\x8e\x3f\x5d"
+			  "\xd4\xb4\x9f\x6a\x02\x24\x4a\xad"
+			  "\x0c\x00\x1b\xdf\x92\xc5\x8a\xe1"
+			  "\x77\x79\xcc\xd5\x20\xbf\x83\xf4"
+			  "\x4b\xad\x11\xbf\xdb\x47\x65\x70"
+			  "\x43\xf3\x65\xdf\xb7\xdc\xb2\xb9"
+			  "\xaa\x3f\xb3\xdf\x79\x69\x0d\xa0"
+			  "\x86\x1c\xba\x48\x0b\x01\xc1\x88"
+			  "\xdf\x03\xb1\x06\x3c\x1d\x56\xa1"
+			  "\x8e\x98\xc1\xa6\x95\xa2\x5b\x72"
+			  "\x76\x59\xd2\x26\x25\xcd\xef\x7c"
+			  "\xc9\x60\xea\x43\xd1\x12\x8a\x8a"
+			  "\x63\x12\x78\xcb\x2f\x88\x1e\x88"
+			  "\x78\x59\xde\xba\x4d\x2c\x78\x61"
+			  "\x75\x37\x54\xfd\x80\xc7\x5e\x98"
+			  "\xcf\x14\x62\x8e\xfb\x72\xee\x4d"
+			  "\x9f\xaf\x8b\x09\xe5\x21\x0a\x91"
+			  "\x8f\x88\x87\xd5\xb1\x84\xab\x18"
+			  "\x08\x57\xed\x72\x35\xa6\x0e\xc6"
+			  "\xff\xcb\xfe\x2c\x48\x39\x14\x44"
+			  "\xba\x59\x32\x3a\x2d\xc4\x5f\xcb"
+			  "\xbe\x68\x8e\x7b\xee\x21\xa4\x32"
+			  "\x11\xa0\x99\xfd\x90\xde\x59\x43"
+			  "\xeb\xed\xd5\x87\x68\x46\xc6\xde"
+			  "\x0b\x07\x17\x59\x6a\xab\xca\x15"
+			  "\x65\x02\x01\xb6\x71\x8c\x3b\xaa"
+			  "\x18\x3b\x30\xae\x38\x5b\x2c\x74"
+			  "\xd4\xee\x4a\xfc\xf7\x1b\x09\xd4"
+			  "\xda\x8b\x1d\x5d\x6f\x21\x6c",
+		.klen	= 16,
+		.len	= 255,
+	},
+	{
+		.key	= "\x24\xf6\xe1\x62\xe5\xaf\x99\xda"
+			  "\x84\xec\x41\xb0\xa3\x0b\xd5\xa8"
+			  "\xa0\x3e\x7b\xa6\xdd\x6c\x8f\xa8",
+		.iv	= "\x7f\x80\x24\x62\x32\xdd\xab\x66"
+			  "\xf2\x87\x29\x24\xec\xd2\x4b\x9f"
+			  "\x0c\x33\x52\xd9\xe0\xcc\x6e\xe4"
+			  "\x90\x85\x43\x97\xc4\x62\x14\x33",
+		.ptext	= "\xef\x58\xe7\x7f\xa9\xd9\xb8\xd7"
+			  "\xa2\x91\x97\x07\x27\x9e\xba\xe8"
+			  "\xaa",
+		.ctext	= "\xd7\xc3\x81\x91\xf2\x40\x17\x73"
+			  "\x3e\x3b\x1c\x2a\x8e\x11\x9c\x17"
+			  "\xf1",
+		.klen	= 24,
+		.len	= 17,
+	},
+	{
+		.key	= "\xbf\xaf\xd7\x67\x8c\x47\xcf\x21"
+			  "\x8a\xa5\xdd\x32\x25\x47\xbe\x4f"
+			  "\xf1\x3a\x0b\xa6\xaa\x2d\xcf\x09",
+		.iv	= "\xd9\xe8\xf0\x92\x4e\xfc\x1d\xf2"
+			  "\x81\x37\x7c\x8f\xf1\x59\x09\x20"
+			  "\xf4\x46\x51\x86\x4f\x54\x8b\x32"
+			  "\x58\xd1\x99\x8b\x8c\x03\xeb\x5d",
+		.ptext	= "\xcd\x64\x90\xf9\x7c\xe5\x0e\x5a"
+			  "\x75\xe7\x8e\x39\x86\xec\x20\x43"
+			  "\x8a\x49\x09\x15\x47\xf4\x3c\x89"
+			  "\x21\xeb\xcf\x4e\xcf\x91\xb5\x40"
+			  "\xcd\xe5\x4d\x5c\x6f\xf2\xd2\x80"
+			  "\xfa\xab\xb3\x76\x9f\x7f\x84\x0a",
+		.ctext	= "\x44\x98\x64\x15\xb7\x0b\x80\xa3"
+			  "\xb9\xca\x23\xff\x3b\x0b\x68\x74"
+			  "\xbb\x3e\x20\x19\x9f\x28\x71\x2a"
+			  "\x48\x3c\x7c\xe2\xef\xb5\x10\xac"
+			  "\x82\x9f\xcd\x08\x8f\x6b\x16\x6f"
+			  "\xc3\xbb\x07\xfb\x3c\xb0\x1b\x27",
+		.klen	= 24,
+		.len	= 48,
+	},
+	{
+		.key	= "\xb8\x35\xa2\x5f\x86\xbb\x82\x99"
+			  "\x27\xeb\x01\x3f\x92\xaf\x80\x24"
+			  "\x4c\x66\xa2\x89\xff\x2e\xa2\x25",
+		.iv	= "\x0a\x1d\x96\xd3\xe0\xe8\x0c\x9b"
+			  "\x9d\x6f\x21\x97\xc2\x17\xdb\x39"
+			  "\x3f\xd8\x64\x48\x80\x04\xee\x43"
+			  "\x02\xce\x88\xe2\x81\x81\x5f\x81",
+		.ptext	= "\xb8\xf9\x16\x8b\x25\x68\xd0\x9c"
+			  "\xd2\x28\xac\xa8\x79\xc2\x30\xc1"
+			  "\x31\xde\x1c\x37\x1b\xa2\xb5\xe6"
+			  "\xf0\xd0\xf8\x9c\x7f\xc6\x46\x07"
+			  "\x5c\xc3\x06\xe4\xf0\x02\xec\xf8"
+			  "\x59\x7c\xc2\x5d\xf8\x0c\x21\xae"
+			  "\x9e\x82\xb1\x1a\x5f\x78\x44\x15"
+			  "\x00\xa7\x2e\x52\xc5\x98\x98\x35"
+			  "\x03\xae\xd0\x8e\x07\x57\xe2\x5a"
+			  "\x17\xbf\x52\x40\x54\x5b\x74\xe5"
+			  "\x2d\x35\xaf\x9e\x37\xf7\x7e\x4a"
+			  "\x8c\x9e\xa1\xdc\x40\xb4\x5b\x36"
+			  "\xdc\x3a\x68\xe6\xb7\x35\x0b\x8a"
+			  "\x90\xec\x74\x8f\x09\x9a\x7f\x02"
+			  "\x4d\x03\x46\x35\x62\xb1\xbd\x08"
+			  "\x3f\x54\x2a\x10\x0b\xdc\x69\xaf"
+			  "\x25\x3a\x0c\x5f\xe0\x51\xe7\x11"
+			  "\xb7\x00\xab\xbb\x9a\xb0\xdc\x4d"
+			  "\xc3\x7d\x1a\x6e\xd1\x09\x52\xbd"
+			  "\x6b\x43\x55\x22\x3a\x78\x14\x7d"
+			  "\x79\xfd\x8d\xfc\x9b\x1d\x0f\xa2"
+			  "\xc7\xb9\xf8\x87\xd5\x96\x50\x61"
+			  "\xa7\x5e\x1e\x57\x97\xe0\xad\x2f"
+			  "\x93\xe6\xe8\x83\xec\x85\x26\x5e"
+			  "\xd9\x2a\x15\xe0\xe9\x09\x25\xa1"
+			  "\x77\x2b\x88\xdc\xa4\xa5\x48\xb6"
+			  "\xf7\xcc\xa6\xa9\xba\xf3\x42\x5c"
+			  "\x70\x9d\xe9\x29\xc1\xf1\x33\xdd"
+			  "\x56\x48\x17\x86\x14\x51\x5c\x10"
+			  "\xab\xfd\xd3\x26\x8c\x21\xf5\x93"
+			  "\x1b\xeb\x47\x97\x73\xbb\x88\x10"
+			  "\xf3\xfe\xf5\xde\xf3\x2e\x05\x46"
+			  "\x1c\x0d\xa3\x10\x48\x9c\x71\x16"
+			  "\x78\x33\x4d\x0a\x74\x3b\xe9\x34"
+			  "\x0b\xa7\x0e\x9e\x61\xe9\xe9\xfd"
+			  "\x85\xa0\xcb\x19\xfd\x7c\x33\xe3"
+			  "\x0e\xce\xc2\x6f\x9d\xa4\x2d\x77"
+			  "\xfd\xad\xee\x5e\x08\x3e\xd7\xf5"
+			  "\xfb\xc3\xd7\x93\x96\x08\x96\xca"
+			  "\x58\x81\x16\x9b\x98\x0a\xe2\xef"
+			  "\x7f\xda\x40\xe4\x1f\x46\x9e\x67"
+			  "\x2b\x84\xcb\x42\xc4\xd6\x6a\xcf"
+			  "\x2d\xb2\x33\xc0\x56\xb3\x35\x6f"
+			  "\x29\x36\x8f\x6a\x5b\xec\xd5\x4f"
+			  "\xa0\x70\xff\xb6\x5b\xde\x6a\x93"
+			  "\x20\x3c\xe2\x76\x7a\xef\x3c\x79"
+			  "\x31\x65\xce\x3a\x0e\xd0\xbe\xa8"
+			  "\x21\x95\xc7\x2b\x62\x8e\x67\xdd"
+			  "\x20\x79\xe4\xe5\x01\x15\xc0\xec"
+			  "\x0f\xd9\x23\xc8\xca\xdf\xd4\x7d"
+			  "\x1d\xf8\x64\x4f\x56\xb1\x83\xa7"
+			  "\x43\xbe\xfc\xcf\xc2\x8c\x33\xda"
+			  "\x36\xd0\x52\xef\x9e\x9e\x88\xf4"
+			  "\xa8\x21\x0f\xaa\xee\x8d\xa0\x24"
+			  "\x4d\xcb\xb1\x72\x07\xf0\xc2\x06"
+			  "\x60\x65\x85\x84\x2c\x60\xcf\x61"
+			  "\xe7\x56\x43\x5b\x2b\x50\x74\xfa"
+			  "\xdb\x4e\xea\x88\xd4\xb3\x83\x8f"
+			  "\x6f\x97\x4b\x57\x7a\x64\x64\xae"
+			  "\x0a\x37\x66\xc5\x03\xad\xb5\xf9"
+			  "\x08\xb0\x3a\x74\xde\x97\x51\xff"
+			  "\x48\x4f\x5c\xa4\xf8\x7a\xb4\x05"
+			  "\x27\x70\x52\x86\x1b\x78\xfc\x18"
+			  "\x06\x27\xa9\x62\xf7\xda\xd2\x8e",
+		.ctext	= "\x3b\xe1\xdb\xb3\xc5\x9a\xde\x69"
+			  "\x58\x05\xcc\xeb\x02\x51\x78\x4a"
+			  "\xac\x28\xe9\xed\xd1\xc9\x15\x7d"
+			  "\x33\x7d\xc1\x47\x12\x41\x11\xf8"
+			  "\x4a\x2c\xb7\xa3\x41\xbe\x59\xf7"
+			  "\x22\xdb\x2c\xda\x9c\x00\x61\x9b"
+			  "\x73\xb3\x0b\x84\x2b\xc1\xf3\x80"
+			  "\x84\xeb\x19\x60\x80\x09\xe1\xcd"
+			  "\x16\x3a\x20\x23\xc4\x82\x4f\xba"
+			  "\x3b\x8e\x55\xd7\xa9\x0b\x75\xd0"
+			  "\xda\xce\xd2\xee\x7e\x4b\x7f\x65"
+			  "\x4d\x28\xc5\xd3\x15\x2c\x40\x96"
+			  "\x52\xd4\x18\x61\x2b\xe7\x83\xec"
+			  "\x89\x62\x9c\x4c\x50\xe6\xe2\xbb"
+			  "\x25\xa1\x0f\xa7\xb0\xb4\xb2\xde"
+			  "\x54\x20\xae\xa3\x56\xa5\x26\x4c"
+			  "\xd5\xcc\xe5\xcb\x28\x44\xb1\xef"
+			  "\x67\x2e\x93\x6d\x00\x88\x83\x9a"
+			  "\xf2\x1c\x48\x38\xec\x1a\x24\x90"
+			  "\x73\x0a\xdb\xe8\xce\x95\x7a\x2c"
+			  "\x8c\xe9\xb7\x07\x1d\xb3\xa3\x20"
+			  "\xbe\xad\x61\x84\xac\xde\x76\xb5"
+			  "\xa6\x28\x29\x47\x63\xc4\xfc\x13"
+			  "\x3f\x71\xfb\x58\x37\x34\x82\xed"
+			  "\x9e\x05\x19\x1f\xc1\x67\xc1\xab"
+			  "\xf5\xfd\x7c\xea\xfa\xa4\xf8\x0a"
+			  "\xac\x4c\x92\xdf\x65\x73\xd7\xdb"
+			  "\xed\x2c\xe0\x84\x5f\x57\x8c\x76"
+			  "\x3e\x05\xc0\xc3\x68\x96\x95\x0b"
+			  "\x88\x97\xfe\x2e\x99\xd5\xc2\xb9"
+			  "\x53\x9f\xf3\x32\x10\x1f\x1f\x5d"
+			  "\xdf\x21\x95\x70\x91\xe8\xa1\x3e"
+			  "\x19\x3e\xb6\x0b\xa8\xdb\xf8\xd4"
+			  "\x54\x27\xb8\xab\x5d\x78\x0c\xe6"
+			  "\xb7\x08\xee\xa4\xb6\x6b\xeb\x5a"
+			  "\x89\x69\x2b\xbd\xd4\x21\x5b\xbf"
+			  "\x79\xbb\x0f\xff\xdb\x23\x9a\xeb"
+			  "\x8d\xf2\xc4\x39\xb4\x90\x77\x6f"
+			  "\x68\xe2\xb8\xf3\xf1\x65\x4f\xd5"
+			  "\x24\x80\x06\xaf\x7c\x8d\x15\x0c"
+			  "\xfd\x56\xe5\xe3\x01\xa5\xf7\x1c"
+			  "\x31\xd6\xa2\x01\x1e\x59\xf9\xa9"
+			  "\x42\xd5\xc2\x34\xda\x25\xde\xc6"
+			  "\x5d\x38\xef\xd1\x4c\xc1\xd9\x1b"
+			  "\x98\xfd\xcd\x57\x6f\xfd\x46\x91"
+			  "\x90\x3d\x52\x2b\x2c\x7d\xcf\x71"
+			  "\xcf\xd1\x77\x23\x71\x36\xb1\xce"
+			  "\xc7\x5d\xf0\x5b\x44\x3d\x43\x71"
+			  "\xac\xb8\xa0\x6a\xea\x89\x5c\xff"
+			  "\x81\x73\xd4\x83\xd1\xc9\xe9\xe2"
+			  "\xa8\xa6\x0f\x36\xe6\xaa\x57\xd4"
+			  "\x27\xd2\xc9\xda\x94\x02\x1f\xfb"
+			  "\xe1\xa1\x07\xbe\xe1\x1b\x15\x94"
+			  "\x1e\xac\x2f\x57\xbb\x41\x22\xaf"
+			  "\x60\x5e\xcc\x66\xcb\x16\x62\xab"
+			  "\xb8\x7c\x99\xf4\x84\x93\x0c\xc2"
+			  "\xa2\x49\xe4\xfd\x17\x55\xe1\xa6"
+			  "\x8d\x5b\xc6\x1b\xc8\xac\xec\x11"
+			  "\x33\xcf\xb0\xe8\xc7\x28\x4f\xb2"
+			  "\x5c\xa6\xe2\x71\xab\x80\x0a\xa7"
+			  "\x5c\x59\x50\x9f\x7a\x32\xb7\xe5"
+			  "\x24\x9a\x8e\x25\x21\x2e\xb7\x18"
+			  "\xd0\xf2\xe7\x27\x6f\xda\xc1\x00"
+			  "\xd9\xa6\x03\x59\xac\x4b\xcb\xba",
+		.klen	= 24,
+		.len	= 512,
+	},
+	{
+		.key	= "\x9e\xeb\xb2\x49\x3c\x1c\xf5\xf4"
+			  "\x6a\x99\xc2\xc4\xdf\xb1\xf4\xdd"
+			  "\x75\x20\x57\xea\x2c\x4f\xcd\xb2"
+			  "\xa5\x3d\x7b\x49\x1e\xab\xfd\x0f",
+		.iv	= "\xdf\x63\xd4\xab\xd2\x49\xf3\xd8"
+			  "\x33\x81\x37\x60\x7d\xfa\x73\x08"
+			  "\xd8\x49\x6d\x80\xe8\x2f\x62\x54"
+			  "\xeb\x0e\xa9\x39\x5b\x45\x7f\x8a",
+		.ptext	= "\x67\xc9\xf2\x30\x84\x41\x8e\x43"
+			  "\xfb\xf3\xb3\x3e\x79\x36\x7f\xe8",
+		.ctext	= "\x27\x38\x78\x47\x16\xd9\x71\x35"
+			  "\x2e\x7e\xdd\x7e\x43\x3c\xb8\x40",
+		.klen	= 32,
+		.len	= 16,
+	},
+	{
+		.key	= "\x93\xfa\x7e\xe2\x0e\x67\xc4\x39"
+			  "\xe7\xca\x47\x95\x68\x9d\x5e\x5a"
+			  "\x7c\x26\x19\xab\xc6\xca\x6a\x4c"
+			  "\x45\xa6\x96\x42\xae\x6c\xff\xe7",
+		.iv	= "\xea\x82\x47\x95\x3b\x22\xa1\x3a"
+			  "\x6a\xca\x24\x4c\x50\x7e\x23\xcd"
+			  "\x0e\x50\xe5\x41\xb6\x65\x29\xd8"
+			  "\x30\x23\x00\xd2\x54\xa7\xd6\x56",
+		.ptext	= "\xdb\x1f\x1f\xec\xad\x83\x6e\x5d"
+			  "\x19\xa5\xf6\x3b\xb4\x93\x5a\x57"
+			  "\x6f",
+		.ctext	= "\xf1\x46\x6e\x9d\xb3\x01\xf0\x6b"
+			  "\xc2\xac\x57\x88\x48\x6d\x40\x72"
+			  "\x68",
+		.klen	= 32,
+		.len	= 17,
+	},
+	{
+		.key	= "\x36\x2b\x57\x97\xf8\x5d\xcd\x99"
+			  "\x5f\x1a\x5a\x44\x1d\x92\x0f\x27"
+			  "\xcc\x16\xd7\x2b\x85\x63\x99\xd3"
+			  "\xba\x96\xa1\xdb\xd2\x60\x68\xda",
+		.iv	= "\xef\x58\x69\xb1\x2c\x5e\x9a\x47"
+			  "\x24\xc1\xb1\x69\xe1\x12\x93\x8f"
+			  "\x43\x3d\x6d\x00\xdb\x5e\xd8\xd9"
+			  "\x12\x9a\xfe\xd9\xff\x2d\xaa\xc4",
+		.ptext	= "\x5e\xa8\x68\x19\x85\x98\x12\x23"
+			  "\x26\x0a\xcc\xdb\x0a\x04\xb9\xdf"
+			  "\x4d\xb3\x48\x7b\xb0\xe3\xc8\x19"
+			  "\x43\x5a\x46\x06\x94\x2d\xf2",
+		.ctext	= "\xdb\xfd\xc8\x03\xd0\xec\xc1\xfe"
+			  "\xbd\x64\x37\xb8\x82\x43\x62\x4e"
+			  "\x7e\x54\xa3\xe2\x24\xa7\x27\xe8"
+			  "\xa4\xd5\xb3\x6c\xb2\x26\xb4",
+		.klen	= 32,
+		.len	= 31,
+	},
+	{
+		.key	= "\x03\x65\x03\x6e\x4d\xe6\xe8\x4e"
+			  "\x8b\xbe\x22\x19\x48\x31\xee\xd9"
+			  "\xa0\x91\x21\xbe\x62\x89\xde\x78"
+			  "\xd9\xb0\x36\xa3\x3c\xce\x43\xd5",
+		.iv	= "\xa9\xc3\x4b\xe7\x0f\xfc\x6d\xbf"
+			  "\x56\x27\x21\x1c\xfc\xd6\x04\x10"
+			  "\x5f\x43\xe2\x30\x35\x29\x6c\x10"
+			  "\x90\xf1\xbf\x61\xed\x0f\x8a\x91",
+		.ptext	= "\x07\xaa\x02\x26\xb4\x98\x11\x5e"
+			  "\x33\x41\x21\x51\x51\x63\x2c\x72"
+			  "\x00\xab\x32\xa7\x1c\xc8\x3c\x9c"
+			  "\x25\x0e\x8b\x9a\xdf\x85\xed\x2d"
+			  "\xf4\xf2\xbc\x55\xca\x92\x6d\x22"
+			  "\xfd\x22\x3b\x42\x4c\x0b\x74\xec",
+		.ctext	= "\x7b\xb1\x43\x6d\xd8\x72\x6c\xf6"
+			  "\x67\x6a\x00\xc4\xf1\xf0\xf5\xa4"
+			  "\xfc\x60\x91\xab\x46\x0b\x15\xfc"
+			  "\xd7\xc1\x28\x15\xa1\xfc\xf7\x68"
+			  "\x8e\xcc\x27\x62\x00\x64\x56\x72"
+			  "\xa6\x17\xd7\x3f\x67\x80\x10\x58",
+		.klen	= 32,
+		.len	= 48,
+	},
+	{
+		.key	= "\xa5\x28\x24\x34\x1a\x3c\xd8\xf7"
+			  "\x05\x91\x8f\xee\x85\x1f\x35\x7f"
+			  "\x80\x3d\xfc\x9b\x94\xf6\xfc\x9e"
+			  "\x19\x09\x00\xa9\x04\x31\x4f\x11",
+		.iv	= "\xa1\xba\x49\x95\xff\x34\x6d\xb8"
+			  "\xcd\x87\x5d\x5e\xfd\xea\x85\xdb"
+			  "\x8a\x7b\x5e\xb2\x5d\x57\xdd\x62"
+			  "\xac\xa9\x8c\x41\x42\x94\x75\xb7",
+		.ptext	= "\x69\xb4\xe8\x8c\x37\xe8\x67\x82"
+			  "\xf1\xec\x5d\x04\xe5\x14\x91\x13"
+			  "\xdf\xf2\x87\x1b\x69\x81\x1d\x71"
+			  "\x70\x9e\x9c\x3b\xde\x49\x70\x11"
+			  "\xa0\xa3\xdb\x0d\x54\x4f\x66\x69"
+			  "\xd7\xdb\x80\xa7\x70\x92\x68\xce"
+			  "\x81\x04\x2c\xc6\xab\xae\xe5\x60"
+			  "\x15\xe9\x6f\xef\xaa\x8f\xa7\xa7"
+			  "\x63\x8f\xf2\xf0\x77\xf1\xa8\xea"
+			  "\xe1\xb7\x1f\x9e\xab\x9e\x4b\x3f"
+			  "\x07\x87\x5b\x6f\xcd\xa8\xaf\xb9"
+			  "\xfa\x70\x0b\x52\xb8\xa8\xa7\x9e"
+			  "\x07\x5f\xa6\x0e\xb3\x9b\x79\x13"
+			  "\x79\xc3\x3e\x8d\x1c\x2c\x68\xc8"
+			  "\x51\x1d\x3c\x7b\x7d\x79\x77\x2a"
+			  "\x56\x65\xc5\x54\x23\x28\xb0\x03",
+		.ctext	= "\xeb\xf9\x98\x86\x3c\x40\x9f\x16"
+			  "\x84\x01\xf9\x06\x0f\xeb\x3c\xa9"
+			  "\x4c\xa4\x8e\x5d\xc3\x8d\xe5\xd3"
+			  "\xae\xa6\xe6\xcc\xd6\x2d\x37\x4f"
+			  "\x99\xc8\xa3\x21\x46\xb8\x69\xf2"
+			  "\xe3\x14\x89\xd7\xb9\xf5\x9e\x4e"
+			  "\x07\x93\x6f\x78\x8e\x6b\xea\x8f"
+			  "\xfb\x43\xb8\x3e\x9b\x4c\x1d\x7e"
+			  "\x20\x9a\xc5\x87\xee\xaf\xf6\xf9"
+			  "\x46\xc5\x18\x8a\xe8\x69\xe7\x96"
+			  "\x52\x55\x5f\x00\x1e\x1a\xdc\xcc"
+			  "\x13\xa5\xee\xff\x4b\x27\xca\xdc"
+			  "\x10\xa6\x48\x76\x98\x43\x94\xa3"
+			  "\xc7\xe2\xc9\x65\x9b\x08\x14\x26"
+			  "\x1d\x68\xfb\x15\x0a\x33\x49\x84"
+			  "\x84\x33\x5a\x1b\x24\x46\x31\x92",
+		.klen	= 32,
+		.len	= 128,
+	},
+	{
+		.key	= "\x36\x45\x11\xa2\x98\x5f\x96\x7c"
+			  "\xc6\xb4\x94\x31\x0a\x67\x09\x32"
+			  "\x6c\x6f\x6f\x00\xf0\x17\xcb\xac"
+			  "\xa5\xa9\x47\x9e\x2e\x85\x2f\xfa",
+		.iv	= "\x28\x88\xaa\x9b\x59\x3b\x1e\x97"
+			  "\x82\xe5\x5c\x9e\x6d\x14\x11\x19"
+			  "\x6e\x38\x8f\xd5\x40\x2b\xca\xf9"
+			  "\x7b\x4c\xe4\xa3\xd0\xd2\x8a\x13",
+		.ptext	= "\x95\xd2\xf7\x71\x1b\xca\xa5\x86"
+			  "\xd9\x48\x01\x93\x2f\x79\x55\x29"
+			  "\x71\x13\x15\x0e\xe6\x12\xbc\x4d"
+			  "\x8a\x31\xe3\x40\x2a\xc6\x5e\x0d"
+			  "\x68\xbb\x4a\x62\x8d\xc7\x45\x77"
+			  "\xd2\xb8\xc7\x1d\xf1\xd2\x5d\x97"
+			  "\xcf\xac\x52\xe5\x32\x77\xb6\xda"
+			  "\x30\x85\xcf\x2b\x98\xe9\xaa\x34"
+			  "\x62\xb5\x23\x9e\xb7\xa6\xd4\xe0"
+			  "\xb4\x58\x18\x8c\x4d\xde\x4d\x01"
+			  "\x83\x89\x24\xca\xfb\x11\xd4\x82"
+			  "\x30\x7a\x81\x35\xa0\xb4\xd4\xb6"
+			  "\x84\xea\x47\x91\x8c\x19\x86\x25"
+			  "\xa6\x06\x8d\x78\xe6\xed\x87\xeb"
+			  "\xda\xea\x73\x7c\xbf\x66\xb8\x72"
+			  "\xe3\x0a\xb8\x0c\xcb\x1a\x73\xf1"
+			  "\xa7\xca\x0a\xde\x57\x2b\xbd\x2b"
+			  "\xeb\x8b\x24\x38\x22\xd3\x0e\x1f"
+			  "\x17\xa0\x84\x98\x31\x77\xfd\x34"
+			  "\x6a\x4e\x3d\x84\x4c\x0e\xfb\xed"
+			  "\xc8\x2a\x51\xfa\xd8\x73\x21\x8a"
+			  "\xdb\xb5\xfe\x1f\xee\xc4\xe8\x65"
+			  "\x54\x84\xdd\x96\x6d\xfd\xd3\x31"
+			  "\x77\x36\x52\x6b\x80\x4f\x9e\xb4"
+			  "\xa2\x55\xbf\x66\x41\x49\x4e\x87"
+			  "\xa7\x0c\xca\xe7\xa5\xc5\xf6\x6f"
+			  "\x27\x56\xe2\x48\x22\xdd\x5f\x59"
+			  "\x3c\xf1\x9f\x83\xe5\x2d\xfb\x71"
+			  "\xad\xd1\xae\x1b\x20\x5c\x47\xb7"
+			  "\x3b\xd3\x14\xce\x81\x42\xb1\x0a"
+			  "\xf0\x49\xfa\xc2\xe7\x86\xbf\xcd"
+			  "\xb0\x95\x9f\x8f\x79\x41\x54",
+		.ctext	= "\xf6\x57\x51\xc4\x25\x61\x2d\xfa"
+			  "\xd6\xd9\x3f\x9a\x81\x51\xdd\x8e"
+			  "\x3d\xe7\xaa\x2d\xb1\xda\xc8\xa6"
+			  "\x9d\xaa\x3c\xab\x62\xf2\x80\xc3"
+			  "\x2c\xe7\x58\x72\x1d\x44\xc5\x28"
+			  "\x7f\xb4\xf9\xbc\x9c\xb2\xab\x8e"
+			  "\xfa\xd1\x4d\x72\xd9\x79\xf5\xa0"
+			  "\x24\x3e\x90\x25\x31\x14\x38\x45"
+			  "\x59\xc8\xf6\xe2\xc6\xf6\xc1\xa7"
+			  "\xb2\xf8\xa7\xa9\x2b\x6f\x12\x3a"
+			  "\xb0\x81\xa4\x08\x57\x59\xb1\x56"
+			  "\x4c\x8f\x18\x55\x33\x5f\xd6\x6a"
+			  "\xc6\xa0\x4b\xd6\x6b\x64\x3e\x9e"
+			  "\xfd\x66\x16\xe2\xdb\xeb\x5f\xb3"
+			  "\x50\x50\x3e\xde\x8d\x72\x76\x01"
+			  "\xbe\xcc\xc9\x52\x09\x2d\x8d\xe7"
+			  "\xd6\xc3\x66\xdb\x36\x08\xd1\x77"
+			  "\xc8\x73\x46\x26\x24\x29\xbf\x68"
+			  "\x2d\x2a\x99\x43\x56\x55\xe4\x93"
+			  "\xaf\xae\x4d\xe7\x55\x4a\xc0\x45"
+			  "\x26\xeb\x3b\x12\x90\x7c\xdc\xd1"
+			  "\xd5\x6f\x0a\xd0\xa9\xd7\x4b\x89"
+			  "\x0b\x07\xd8\x86\xad\xa1\xc4\x69"
+			  "\x1f\x5e\x8b\xc4\x9e\x91\x41\x25"
+			  "\x56\x98\x69\x78\x3a\x9e\xae\x91"
+			  "\xd8\xd9\xfa\xfb\xff\x81\x25\x09"
+			  "\xfc\xed\x2d\x87\xbc\x04\x62\x97"
+			  "\x35\xe1\x26\xc2\x46\x1c\xcf\xd7"
+			  "\x14\xed\x02\x09\xa5\xb2\xb6\xaa"
+			  "\x27\x4e\x61\xb3\x71\x6b\x47\x16"
+			  "\xb7\xe8\xd4\xaf\x52\xeb\x6a\x6b"
+			  "\xdb\x4c\x65\x21\x9e\x1c\x36",
+		.klen	= 32,
+		.len	= 255,
+	},
+	{
+		.key	= "\xd3\x81\x72\x18\x23\xff\x6f\x4a"
+			  "\x25\x74\x29\x0d\x51\x8a\x0e\x13"
+			  "\xc1\x53\x5d\x30\x8d\xee\x75\x0d"
+			  "\x14\xd6\x69\xc9\x15\xa9\x0c\x60",
+		.iv	= "\x65\x9b\xd4\xa8\x7d\x29\x1d\xf4"
+			  "\xc4\xd6\x9b\x6a\x28\xab\x64\xe2"
+			  "\x62\x81\x97\xc5\x81\xaa\xf9\x44"
+			  "\xc1\x72\x59\x82\xaf\x16\xc8\x2c",
+		.ptext	= "\xc7\x6b\x52\x6a\x10\xf0\xcc\x09"
+			  "\xc1\x12\x1d\x6d\x21\xa6\x78\xf5"
+			  "\x05\xa3\x69\x60\x91\x36\x98\x57"
+			  "\xba\x0c\x14\xcc\xf3\x2d\x73\x03"
+			  "\xc6\xb2\x5f\xc8\x16\x27\x37\x5d"
+			  "\xd0\x0b\x87\xb2\x50\x94\x7b\x58"
+			  "\x04\xf4\xe0\x7f\x6e\x57\x8e\xc9"
+			  "\x41\x84\xc1\xb1\x7e\x4b\x91\x12"
+			  "\x3a\x8b\x5d\x50\x82\x7b\xcb\xd9"
+			  "\x9a\xd9\x4e\x18\x06\x23\x9e\xd4"
+			  "\xa5\x20\x98\xef\xb5\xda\xe5\xc0"
+			  "\x8a\x6a\x83\x77\x15\x84\x1e\xae"
+			  "\x78\x94\x9d\xdf\xb7\xd1\xea\x67"
+			  "\xaa\xb0\x14\x15\xfa\x67\x21\x84"
+			  "\xd3\x41\x2a\xce\xba\x4b\x4a\xe8"
+			  "\x95\x62\xa9\x55\xf0\x80\xad\xbd"
+			  "\xab\xaf\xdd\x4f\xa5\x7c\x13\x36"
+			  "\xed\x5e\x4f\x72\xad\x4b\xf1\xd0"
+			  "\x88\x4e\xec\x2c\x88\x10\x5e\xea"
+			  "\x12\xc0\x16\x01\x29\xa3\xa0\x55"
+			  "\xaa\x68\xf3\xe9\x9d\x3b\x0d\x3b"
+			  "\x6d\xec\xf8\xa0\x2d\xf0\x90\x8d"
+			  "\x1c\xe2\x88\xd4\x24\x71\xf9\xb3"
+			  "\xc1\x9f\xc5\xd6\x76\x70\xc5\x2e"
+			  "\x9c\xac\xdb\x90\xbd\x83\x72\xba"
+			  "\x6e\xb5\xa5\x53\x83\xa9\xa5\xbf"
+			  "\x7d\x06\x0e\x3c\x2a\xd2\x04\xb5"
+			  "\x1e\x19\x38\x09\x16\xd2\x82\x1f"
+			  "\x75\x18\x56\xb8\x96\x0b\xa6\xf9"
+			  "\xcf\x62\xd9\x32\x5d\xa9\xd7\x1d"
+			  "\xec\xe4\xdf\x1b\xbe\xf1\x36\xee"
+			  "\xe3\x7b\xb5\x2f\xee\xf8\x53\x3d"
+			  "\x6a\xb7\x70\xa9\xfc\x9c\x57\x25"
+			  "\xf2\x89\x10\xd3\xb8\xa8\x8c\x30"
+			  "\xae\x23\x4f\x0e\x13\x66\x4f\xe1"
+			  "\xb6\xc0\xe4\xf8\xef\x93\xbd\x6e"
+			  "\x15\x85\x6b\xe3\x60\x81\x1d\x68"
+			  "\xd7\x31\x87\x89\x09\xab\xd5\x96"
+			  "\x1d\xf3\x6d\x67\x80\xca\x07\x31"
+			  "\x5d\xa7\xe4\xfb\x3e\xf2\x9b\x33"
+			  "\x52\x18\xc8\x30\xfe\x2d\xca\x1e"
+			  "\x79\x92\x7a\x60\x5c\xb6\x58\x87"
+			  "\xa4\x36\xa2\x67\x92\x8b\xa4\xb7"
+			  "\xf1\x86\xdf\xdc\xc0\x7e\x8f\x63"
+			  "\xd2\xa2\xdc\x78\xeb\x4f\xd8\x96"
+			  "\x47\xca\xb8\x91\xf9\xf7\x94\x21"
+			  "\x5f\x9a\x9f\x5b\xb8\x40\x41\x4b"
+			  "\x66\x69\x6a\x72\xd0\xcb\x70\xb7"
+			  "\x93\xb5\x37\x96\x05\x37\x4f\xe5"
+			  "\x8c\xa7\x5a\x4e\x8b\xb7\x84\xea"
+			  "\xc7\xfc\x19\x6e\x1f\x5a\xa1\xac"
+			  "\x18\x7d\x52\x3b\xb3\x34\x62\x99"
+			  "\xe4\x9e\x31\x04\x3f\xc0\x8d\x84"
+			  "\x17\x7c\x25\x48\x52\x67\x11\x27"
+			  "\x67\xbb\x5a\x85\xca\x56\xb2\x5c"
+			  "\xe6\xec\xd5\x96\x3d\x15\xfc\xfb"
+			  "\x22\x25\xf4\x13\xe5\x93\x4b\x9a"
+			  "\x77\xf1\x52\x18\xfa\x16\x5e\x49"
+			  "\x03\x45\xa8\x08\xfa\xb3\x41\x92"
+			  "\x79\x50\x33\xca\xd0\xd7\x42\x55"
+			  "\xc3\x9a\x0c\x4e\xd9\xa4\x3c\x86"
+			  "\x80\x9f\x53\xd1\xa4\x2e\xd1\xbc"
+			  "\xf1\x54\x6e\x93\xa4\x65\x99\x8e"
+			  "\xdf\x29\xc0\x64\x63\x07\xbb\xea",
+		.ctext	= "\x9f\x72\x87\xc7\x17\xfb\x20\x15"
+			  "\x65\xb3\x55\xa8\x1c\x8e\x52\x32"
+			  "\xb1\x82\x8d\xbf\xb5\x9f\x10\x0a"
+			  "\xe8\x0c\x70\x62\xef\x89\xb6\x1f"
+			  "\x73\xcc\xe4\xcc\x7a\x3a\x75\x4a"
+			  "\x26\xe7\xf5\xd7\x7b\x17\x39\x2d"
+			  "\xd2\x27\x6e\xf9\x2f\x9e\xe2\xf6"
+			  "\xfa\x16\xc2\xf2\x49\x26\xa7\x5b"
+			  "\xe7\xca\x25\x0e\x45\xa0\x34\xc2"
+			  "\x9a\x37\x79\x7e\x7c\x58\x18\x94"
+			  "\x10\xa8\x7c\x48\xa9\xd7\x63\x89"
+			  "\x9e\x61\x4d\x26\x34\xd9\xf0\xb1"
+			  "\x2d\x17\x2c\x6f\x7c\x35\x0e\xbe"
+			  "\x77\x71\x7c\x17\x5b\xab\x70\xdb"
+			  "\x2f\x54\x0f\xa9\xc8\xf4\xf5\xab"
+			  "\x52\x04\x3a\xb8\x03\xa7\xfd\x57"
+			  "\x45\x5e\xbc\x77\xe1\xee\x79\x8c"
+			  "\x58\x7b\x1f\xf7\x75\xde\x68\x17"
+			  "\x98\x85\x8a\x18\x5c\xd2\x39\x78"
+			  "\x7a\x6f\x26\x6e\xe1\x13\x91\xdd"
+			  "\xdf\x0e\x6e\x67\xcc\x51\x53\xd8"
+			  "\x17\x5e\xce\xa7\xe4\xaf\xfa\xf3"
+			  "\x4f\x9f\x01\x9b\x04\xe7\xfc\xf9"
+			  "\x6a\xdc\x1d\x0c\x9a\xaa\x3a\x7a"
+			  "\x73\x03\xdf\xbf\x3b\x82\xbe\xb0"
+			  "\xb4\xa4\xcf\x07\xd7\xde\x71\x25"
+			  "\xc5\x10\xee\x0a\x15\x96\x8b\x4f"
+			  "\xfe\xb8\x28\xbd\x4a\xcd\xeb\x9f"
+			  "\x5d\x00\xc1\xee\xe8\x16\x44\xec"
+			  "\xe9\x7b\xd6\x85\x17\x29\xcf\x58"
+			  "\x20\xab\xf7\xce\x6b\xe7\x71\x7d"
+			  "\x4f\xa8\xb0\xe9\x7d\x70\xd6\x0b"
+			  "\x2e\x20\xb1\x1a\x63\x37\xaa\x2c"
+			  "\x94\xee\xd5\xf6\x58\x2a\xf4\x7a"
+			  "\x4c\xba\xf5\xe9\x3c\x6f\x95\x13"
+			  "\x5f\x96\x81\x5b\xb5\x62\xf2\xd7"
+			  "\x8d\xbe\xa1\x31\x51\xe6\xfe\xc9"
+			  "\x07\x7d\x0f\x00\x3a\x66\x8c\x4b"
+			  "\x94\xaa\xe5\x56\xde\xcd\x74\xa7"
+			  "\x48\x67\x6f\xed\xc9\x6a\xef\xaf"
+			  "\x9a\xb7\xae\x60\xfa\xc0\x37\x39"
+			  "\xa5\x25\xe5\x22\xea\x82\x55\x68"
+			  "\x3e\x30\xc3\x5a\xb6\x29\x73\x7a"
+			  "\xb6\xfb\x34\xee\x51\x7c\x54\xe5"
+			  "\x01\x4d\x72\x25\x32\x4a\xa3\x68"
+			  "\x80\x9a\x89\xc5\x11\x66\x4c\x8c"
+			  "\x44\x50\xbe\xd7\xa0\xee\xa6\xbb"
+			  "\x92\x0c\xe6\xd7\x83\x51\xb1\x69"
+			  "\x63\x40\xf3\xf4\x92\x84\xc4\x38"
+			  "\x29\xfb\xb4\x84\xa0\x19\x75\x16"
+			  "\x60\xbf\x0a\x9c\x89\xee\xad\xb4"
+			  "\x43\xf9\x71\x39\x45\x7c\x24\x83"
+			  "\x30\xbb\xee\x28\xb0\x86\x7b\xec"
+			  "\x93\xc1\xbf\xb9\x97\x1b\x96\xef"
+			  "\xee\x58\x35\x61\x12\x19\xda\x25"
+			  "\x77\xe5\x80\x1a\x31\x27\x9b\xe4"
+			  "\xda\x8b\x7e\x51\x4d\xcb\x01\x19"
+			  "\x4f\xdc\x92\x1a\x17\xd5\x6b\xf4"
+			  "\x50\xe3\x06\xe4\x76\x9f\x65\x00"
+			  "\xbd\x7a\xe2\x64\x26\xf2\xe4\x7e"
+			  "\x40\xf2\x80\xab\x62\xd5\xef\x23"
+			  "\x8b\xfb\x6f\x24\x6e\x9b\x66\x0e"
+			  "\xf4\x1c\x24\x1e\x1d\x26\x95\x09"
+			  "\x94\x3c\xb2\xb6\x02\xa7\xd9\x9a",
+		.klen	= 32,
+		.len	= 512,
+	},
+
+};
 
 #endif	/* _CRYPTO_TESTMGR_H */
diff --git a/crypto/xctr.c b/crypto/xctr.c
new file mode 100644
index 0000000000000..5c00147e8ec40
--- /dev/null
+++ b/crypto/xctr.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * XCTR: XOR Counter mode - Adapted from ctr.c
+ *
+ * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
+ * Copyright 2021 Google LLC
+ */
+
+/*
+ * XCTR mode is a blockcipher mode of operation used to implement HCTR2. XCTR is
+ * closely related to the CTR mode of operation; the main difference is that CTR
+ * generates the keystream using E(CTR + IV) whereas XCTR generates the
+ * keystream using E(CTR ^ IV). This allows implementations to avoid dealing
+ * with multi-limb integers (as is required in CTR mode). XCTR is also specified
+ * using little-endian arithmetic which makes it slightly faster on LE machines.
+ *
+ * See the HCTR2 paper for more details:
+ *	Length-preserving encryption with HCTR2
+ *      (https://eprint.iacr.org/2021/1441.pdf)
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/cipher.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+/* For now this implementation is limited to 16-byte blocks for simplicity */
+#define XCTR_BLOCKSIZE 16
+
+static void crypto_xctr_crypt_final(struct skcipher_walk *walk,
+				   struct crypto_cipher *tfm, u32 byte_ctr)
+{
+	u8 keystream[XCTR_BLOCKSIZE];
+	const u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+	__le32 ctr32 = cpu_to_le32(byte_ctr / XCTR_BLOCKSIZE + 1);
+
+	crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
+	crypto_cipher_encrypt_one(tfm, keystream, walk->iv);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
+	crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
+}
+
+static int crypto_xctr_crypt_segment(struct skcipher_walk *walk,
+				    struct crypto_cipher *tfm, u32 byte_ctr)
+{
+	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
+		   crypto_cipher_alg(tfm)->cia_encrypt;
+	const u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+	__le32 ctr32 = cpu_to_le32(byte_ctr / XCTR_BLOCKSIZE + 1);
+
+	do {
+		crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
+		fn(crypto_cipher_tfm(tfm), dst, walk->iv);
+		crypto_xor(dst, src, XCTR_BLOCKSIZE);
+		crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
+
+		le32_add_cpu(&ctr32, 1);
+
+		src += XCTR_BLOCKSIZE;
+		dst += XCTR_BLOCKSIZE;
+	} while ((nbytes -= XCTR_BLOCKSIZE) >= XCTR_BLOCKSIZE);
+
+	return nbytes;
+}
+
+static int crypto_xctr_crypt_inplace(struct skcipher_walk *walk,
+				    struct crypto_cipher *tfm, u32 byte_ctr)
+{
+	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
+		   crypto_cipher_alg(tfm)->cia_encrypt;
+	unsigned long alignmask = crypto_cipher_alignmask(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *data = walk->src.virt.addr;
+	u8 tmp[XCTR_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
+	u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
+	__le32 ctr32 = cpu_to_le32(byte_ctr / XCTR_BLOCKSIZE + 1);
+
+	do {
+		crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
+		fn(crypto_cipher_tfm(tfm), keystream, walk->iv);
+		crypto_xor(data, keystream, XCTR_BLOCKSIZE);
+		crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
+
+		le32_add_cpu(&ctr32, 1);
+
+		data += XCTR_BLOCKSIZE;
+	} while ((nbytes -= XCTR_BLOCKSIZE) >= XCTR_BLOCKSIZE);
+
+	return nbytes;
+}
+
+static int crypto_xctr_crypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_cipher *cipher = skcipher_cipher_simple(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
+	int err;
+	u32 byte_ctr = 0;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while (walk.nbytes >= XCTR_BLOCKSIZE) {
+		if (walk.src.virt.addr == walk.dst.virt.addr)
+			nbytes = crypto_xctr_crypt_inplace(&walk, cipher,
+							   byte_ctr);
+		else
+			nbytes = crypto_xctr_crypt_segment(&walk, cipher,
+							   byte_ctr);
+
+		byte_ctr += walk.nbytes - nbytes;
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+
+	if (walk.nbytes) {
+		crypto_xctr_crypt_final(&walk, cipher, byte_ctr);
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	return err;
+}
+
+static int crypto_xctr_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct skcipher_instance *inst;
+	struct crypto_alg *alg;
+	int err;
+
+	inst = skcipher_alloc_instance_simple(tmpl, tb);
+	if (IS_ERR(inst))
+		return PTR_ERR(inst);
+
+	alg = skcipher_ialg_simple(inst);
+
+	/* Block size must be 16 bytes. */
+	err = -EINVAL;
+	if (alg->cra_blocksize != XCTR_BLOCKSIZE)
+		goto out_free_inst;
+
+	/* XCTR mode is a stream cipher. */
+	inst->alg.base.cra_blocksize = 1;
+
+	/*
+	 * To simplify the implementation, configure the skcipher walk to only
+	 * give a partial block at the very end, never earlier.
+	 */
+	inst->alg.chunksize = alg->cra_blocksize;
+
+	inst->alg.encrypt = crypto_xctr_crypt;
+	inst->alg.decrypt = crypto_xctr_crypt;
+
+	err = skcipher_register_instance(tmpl, inst);
+	if (err) {
+out_free_inst:
+		inst->free(inst);
+	}
+
+	return err;
+}
+
+static struct crypto_template crypto_xctr_tmpl = {
+	.name = "xctr",
+	.create = crypto_xctr_create,
+	.module = THIS_MODULE,
+};
+
+static int __init crypto_xctr_module_init(void)
+{
+	return crypto_register_template(&crypto_xctr_tmpl);
+}
+
+static void __exit crypto_xctr_module_exit(void)
+{
+	crypto_unregister_template(&crypto_xctr_tmpl);
+}
+
+subsys_initcall(crypto_xctr_module_init);
+module_exit(crypto_xctr_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("XCTR block cipher mode of operation");
+MODULE_ALIAS_CRYPTO("xctr");
+MODULE_IMPORT_NS(CRYPTO_INTERNAL);
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
index 5bb950182026f..910d6751644cf 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
@@ -170,6 +170,7 @@ dma_iv_error:
 	while (i >= 0) {
 		dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE);
 		memzero_explicit(sf->iv[i], ivsize);
+		i--;
 	}
 	return err;
 }
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
index 98593a0cff694..ac2329e2b0e58 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
@@ -528,25 +528,33 @@ static int allocate_flows(struct sun8i_ss_dev *ss)
 
 		ss->flows[i].biv = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
 						GFP_KERNEL | GFP_DMA);
-		if (!ss->flows[i].biv)
+		if (!ss->flows[i].biv) {
+			err = -ENOMEM;
 			goto error_engine;
+		}
 
 		for (j = 0; j < MAX_SG; j++) {
 			ss->flows[i].iv[j] = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
 							  GFP_KERNEL | GFP_DMA);
-			if (!ss->flows[i].iv[j])
+			if (!ss->flows[i].iv[j]) {
+				err = -ENOMEM;
 				goto error_engine;
+			}
 		}
 
 		/* the padding could be up to two block. */
 		ss->flows[i].pad = devm_kmalloc(ss->dev, MAX_PAD_SIZE,
 						GFP_KERNEL | GFP_DMA);
-		if (!ss->flows[i].pad)
+		if (!ss->flows[i].pad) {
+			err = -ENOMEM;
 			goto error_engine;
+		}
 		ss->flows[i].result = devm_kmalloc(ss->dev, SHA256_DIGEST_SIZE,
 						   GFP_KERNEL | GFP_DMA);
-		if (!ss->flows[i].result)
+		if (!ss->flows[i].result) {
+			err = -ENOMEM;
 			goto error_engine;
+		}
 
 		ss->flows[i].engine = crypto_engine_alloc_init(ss->dev, true);
 		if (!ss->flows[i].engine) {
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
index ac417a6b39e5f..36a82b22953cd 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
@@ -30,8 +30,8 @@ static int sun8i_ss_hashkey(struct sun8i_ss_hash_tfm_ctx *tfmctx, const u8 *key,
 	int ret = 0;
 
 	xtfm = crypto_alloc_shash("sha1", 0, CRYPTO_ALG_NEED_FALLBACK);
-	if (!xtfm)
-		return -ENOMEM;
+	if (IS_ERR(xtfm))
+		return PTR_ERR(xtfm);
 
 	len = sizeof(*sdesc) + crypto_shash_descsize(xtfm);
 	sdesc = kmalloc(len, GFP_KERNEL);
@@ -586,7 +586,8 @@ retry:
 			rctx->t_dst[k + 1].len = rctx->t_dst[k].len;
 		}
 		addr_xpad = dma_map_single(ss->dev, tfmctx->ipad, bs, DMA_TO_DEVICE);
-		if (dma_mapping_error(ss->dev, addr_xpad)) {
+		err = dma_mapping_error(ss->dev, addr_xpad);
+		if (err) {
 			dev_err(ss->dev, "Fail to create DMA mapping of ipad\n");
 			goto err_dma_xpad;
 		}
@@ -612,7 +613,8 @@ retry:
 			goto err_dma_result;
 		}
 		addr_xpad = dma_map_single(ss->dev, tfmctx->opad, bs, DMA_TO_DEVICE);
-		if (dma_mapping_error(ss->dev, addr_xpad)) {
+		err = dma_mapping_error(ss->dev, addr_xpad);
+		if (err) {
 			dev_err(ss->dev, "Fail to create DMA mapping of opad\n");
 			goto err_dma_xpad;
 		}
diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
index 59a57279e77bf..a4b13d326cfc6 100644
--- a/drivers/crypto/atmel-ecc.c
+++ b/drivers/crypto/atmel-ecc.c
@@ -349,8 +349,16 @@ static int atmel_ecc_remove(struct i2c_client *client)
 
 	/* Return EBUSY if i2c client already allocated. */
 	if (atomic_read(&i2c_priv->tfm_count)) {
-		dev_err(&client->dev, "Device is busy\n");
-		return -EBUSY;
+		/*
+		 * After we return here, the memory backing the device is freed.
+		 * That happens no matter what the return value of this function
+		 * is because in the Linux device model there is no error
+		 * handling for unbinding a driver.
+		 * If there is still some action pending, it probably involves
+		 * accessing the freed memory.
+		 */
+		dev_emerg(&client->dev, "Device is busy, expect memory corruption.\n");
+		return 0;
 	}
 
 	crypto_unregister_kpp(&atmel_ecdh_nist_p256);
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index a5d9123a22ead..83350e2d9821e 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -366,7 +366,7 @@ struct ccp_device {
 
 	/* Master lists that all cmds are queued on. Because there can be
 	 * more than one CCP command queue that can process a cmd a separate
-	 * backlog list is neeeded so that the backlog completion call
+	 * backlog list is needed so that the backlog completion call
 	 * completes before the cmd is available for execution.
 	 */
 	spinlock_t cmd_lock ____cacheline_aligned;
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 799b476fc3e82..9f588c9728f8b 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -503,7 +503,7 @@ static int __sev_platform_shutdown_locked(int *error)
 	struct sev_device *sev = psp_master->sev_data;
 	int ret;
 
-	if (sev->state == SEV_STATE_UNINIT)
+	if (!sev || sev->state == SEV_STATE_UNINIT)
 		return 0;
 
 	ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
@@ -577,6 +577,8 @@ static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
 	struct sev_user_data_status data;
 	int ret;
 
+	memset(&data, 0, sizeof(data));
+
 	ret = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, &data, &argp->error);
 	if (ret)
 		return ret;
@@ -630,7 +632,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable)
 	if (input.length > SEV_FW_BLOB_MAX_SIZE)
 		return -EFAULT;
 
-	blob = kmalloc(input.length, GFP_KERNEL);
+	blob = kzalloc(input.length, GFP_KERNEL);
 	if (!blob)
 		return -ENOMEM;
 
@@ -854,7 +856,7 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp)
 	input_address = (void __user *)input.address;
 
 	if (input.address && input.length) {
-		id_blob = kmalloc(input.length, GFP_KERNEL);
+		id_blob = kzalloc(input.length, GFP_KERNEL);
 		if (!id_blob)
 			return -ENOMEM;
 
@@ -973,14 +975,14 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable)
 	if (input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE)
 		return -EFAULT;
 
-	pdh_blob = kmalloc(input.pdh_cert_len, GFP_KERNEL);
+	pdh_blob = kzalloc(input.pdh_cert_len, GFP_KERNEL);
 	if (!pdh_blob)
 		return -ENOMEM;
 
 	data.pdh_cert_address = __psp_pa(pdh_blob);
 	data.pdh_cert_len = input.pdh_cert_len;
 
-	cert_blob = kmalloc(input.cert_chain_len, GFP_KERNEL);
+	cert_blob = kzalloc(input.cert_chain_len, GFP_KERNEL);
 	if (!cert_blob) {
 		ret = -ENOMEM;
 		goto e_free_pdh;
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index b4ca2eb034d7d..ad83c194d6648 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -877,13 +877,6 @@ static void qm_pm_put_sync(struct hisi_qm *qm)
 	pm_runtime_put_autosuspend(dev);
 }
 
-static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe)
-{
-	u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
-
-	return &qm->qp_array[cqn];
-}
-
 static void qm_cq_head_update(struct hisi_qp *qp)
 {
 	if (qp->qp_status.cq_head == QM_Q_DEPTH - 1) {
@@ -894,47 +887,37 @@ static void qm_cq_head_update(struct hisi_qp *qp)
 	}
 }
 
-static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
+static void qm_poll_req_cb(struct hisi_qp *qp)
 {
-	if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP))
-		return;
-
-	if (qp->event_cb) {
-		qp->event_cb(qp);
-		return;
-	}
-
-	if (qp->req_cb) {
-		struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
-
-		while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
-			dma_rmb();
-			qp->req_cb(qp, qp->sqe + qm->sqe_size *
-				   le16_to_cpu(cqe->sq_head));
-			qm_cq_head_update(qp);
-			cqe = qp->cqe + qp->qp_status.cq_head;
-			qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
-			      qp->qp_status.cq_head, 0);
-			atomic_dec(&qp->qp_status.used);
-		}
+	struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
+	struct hisi_qm *qm = qp->qm;
 
-		/* set c_flag */
+	while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
+		dma_rmb();
+		qp->req_cb(qp, qp->sqe + qm->sqe_size *
+			   le16_to_cpu(cqe->sq_head));
+		qm_cq_head_update(qp);
+		cqe = qp->cqe + qp->qp_status.cq_head;
 		qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
-		      qp->qp_status.cq_head, 1);
+		      qp->qp_status.cq_head, 0);
+		atomic_dec(&qp->qp_status.used);
 	}
+
+	/* set c_flag */
+	qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ, qp->qp_status.cq_head, 1);
 }
 
-static void qm_work_process(struct work_struct *work)
+static int qm_get_complete_eqe_num(struct hisi_qm_poll_data *poll_data)
 {
-	struct hisi_qm *qm = container_of(work, struct hisi_qm, work);
+	struct hisi_qm *qm = poll_data->qm;
 	struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
-	struct hisi_qp *qp;
 	int eqe_num = 0;
+	u16 cqn;
 
 	while (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
+		cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
+		poll_data->qp_finish_id[eqe_num] = cqn;
 		eqe_num++;
-		qp = qm_to_hisi_qp(qm, eqe);
-		qm_poll_qp(qp, qm);
 
 		if (qm->status.eq_head == QM_EQ_DEPTH - 1) {
 			qm->status.eqc_phase = !qm->status.eqc_phase;
@@ -945,37 +928,70 @@ static void qm_work_process(struct work_struct *work)
 			qm->status.eq_head++;
 		}
 
-		if (eqe_num == QM_EQ_DEPTH / 2 - 1) {
-			eqe_num = 0;
-			qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
-		}
+		if (eqe_num == (QM_EQ_DEPTH >> 1) - 1)
+			break;
 	}
 
 	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
+
+	return eqe_num;
 }
 
-static irqreturn_t do_qm_irq(int irq, void *data)
+static void qm_work_process(struct work_struct *work)
 {
-	struct hisi_qm *qm = (struct hisi_qm *)data;
+	struct hisi_qm_poll_data *poll_data =
+		container_of(work, struct hisi_qm_poll_data, work);
+	struct hisi_qm *qm = poll_data->qm;
+	struct hisi_qp *qp;
+	int eqe_num, i;
 
-	/* the workqueue created by device driver of QM */
-	if (qm->wq)
-		queue_work(qm->wq, &qm->work);
-	else
-		schedule_work(&qm->work);
+	/* Get qp id of completed tasks and re-enable the interrupt. */
+	eqe_num = qm_get_complete_eqe_num(poll_data);
+	for (i = eqe_num - 1; i >= 0; i--) {
+		qp = &qm->qp_array[poll_data->qp_finish_id[i]];
+		if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP))
+			continue;
 
-	return IRQ_HANDLED;
+		if (qp->event_cb) {
+			qp->event_cb(qp);
+			continue;
+		}
+
+		if (likely(qp->req_cb))
+			qm_poll_req_cb(qp);
+	}
+}
+
+static bool do_qm_irq(struct hisi_qm *qm)
+{
+	struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
+	struct hisi_qm_poll_data *poll_data;
+	u16 cqn;
+
+	if (!readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
+		return false;
+
+	if (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
+		cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
+		poll_data = &qm->poll_data[cqn];
+		queue_work(qm->wq, &poll_data->work);
+
+		return true;
+	}
+
+	return false;
 }
 
 static irqreturn_t qm_irq(int irq, void *data)
 {
 	struct hisi_qm *qm = data;
+	bool ret;
 
-	if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
-		return do_qm_irq(irq, data);
+	ret = do_qm_irq(qm);
+	if (ret)
+		return IRQ_HANDLED;
 
 	atomic64_inc(&qm->debug.dfx.err_irq_cnt);
-	dev_err(&qm->pdev->dev, "invalid int source\n");
 	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
 
 	return IRQ_NONE;
@@ -3134,11 +3150,8 @@ static int qm_stop_qp_nolock(struct hisi_qp *qp)
 	if (ret)
 		dev_err(dev, "Failed to drain out data for stopping!\n");
 
-	if (qp->qm->wq)
-		flush_workqueue(qp->qm->wq);
-	else
-		flush_work(&qp->qm->work);
 
+	flush_workqueue(qp->qm->wq);
 	if (unlikely(qp->is_resetting && atomic_read(&qp->qp_status.used)))
 		qp_stop_fail_cb(qp);
 
@@ -3557,8 +3570,10 @@ static void hisi_qp_memory_uninit(struct hisi_qm *qm, int num)
 	for (i = num - 1; i >= 0; i--) {
 		qdma = &qm->qp_array[i].qdma;
 		dma_free_coherent(dev, qdma->size, qdma->va, qdma->dma);
+		kfree(qm->poll_data[i].qp_finish_id);
 	}
 
+	kfree(qm->poll_data);
 	kfree(qm->qp_array);
 }
 
@@ -3567,12 +3582,18 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id)
 	struct device *dev = &qm->pdev->dev;
 	size_t off = qm->sqe_size * QM_Q_DEPTH;
 	struct hisi_qp *qp;
+	int ret = -ENOMEM;
+
+	qm->poll_data[id].qp_finish_id = kcalloc(qm->qp_num, sizeof(u16),
+						 GFP_KERNEL);
+	if (!qm->poll_data[id].qp_finish_id)
+		return -ENOMEM;
 
 	qp = &qm->qp_array[id];
 	qp->qdma.va = dma_alloc_coherent(dev, dma_size, &qp->qdma.dma,
 					 GFP_KERNEL);
 	if (!qp->qdma.va)
-		return -ENOMEM;
+		goto err_free_qp_finish_id;
 
 	qp->sqe = qp->qdma.va;
 	qp->sqe_dma = qp->qdma.dma;
@@ -3583,6 +3604,10 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id)
 	qp->qp_id = id;
 
 	return 0;
+
+err_free_qp_finish_id:
+	kfree(qm->poll_data[id].qp_finish_id);
+	return ret;
 }
 
 static void hisi_qm_pre_init(struct hisi_qm *qm)
@@ -3672,6 +3697,26 @@ static void qm_last_regs_uninit(struct hisi_qm *qm)
 	debug->qm_last_words = NULL;
 }
 
+static void hisi_qm_unint_work(struct hisi_qm *qm)
+{
+	destroy_workqueue(qm->wq);
+}
+
+static void hisi_qm_memory_uninit(struct hisi_qm *qm)
+{
+	struct device *dev = &qm->pdev->dev;
+
+	hisi_qp_memory_uninit(qm, qm->qp_num);
+	if (qm->qdma.va) {
+		hisi_qm_cache_wb(qm);
+		dma_free_coherent(dev, qm->qdma.size,
+				  qm->qdma.va, qm->qdma.dma);
+	}
+
+	idr_destroy(&qm->qp_idr);
+	kfree(qm->factor);
+}
+
 /**
  * hisi_qm_uninit() - Uninitialize qm.
  * @qm: The qm needed uninit.
@@ -3680,13 +3725,10 @@ static void qm_last_regs_uninit(struct hisi_qm *qm)
  */
 void hisi_qm_uninit(struct hisi_qm *qm)
 {
-	struct pci_dev *pdev = qm->pdev;
-	struct device *dev = &pdev->dev;
-
 	qm_last_regs_uninit(qm);
 
 	qm_cmd_uninit(qm);
-	kfree(qm->factor);
+	hisi_qm_unint_work(qm);
 	down_write(&qm->qps_lock);
 
 	if (!qm_avail_state(qm, QM_CLOSE)) {
@@ -3694,14 +3736,7 @@ void hisi_qm_uninit(struct hisi_qm *qm)
 		return;
 	}
 
-	hisi_qp_memory_uninit(qm, qm->qp_num);
-	idr_destroy(&qm->qp_idr);
-
-	if (qm->qdma.va) {
-		hisi_qm_cache_wb(qm);
-		dma_free_coherent(dev, qm->qdma.size,
-				  qm->qdma.va, qm->qdma.dma);
-	}
+	hisi_qm_memory_uninit(qm);
 	hisi_qm_set_state(qm, QM_NOT_READY);
 	up_write(&qm->qps_lock);
 
@@ -6018,14 +6053,28 @@ err_disable_pcidev:
 	return ret;
 }
 
-static void hisi_qm_init_work(struct hisi_qm *qm)
+static int hisi_qm_init_work(struct hisi_qm *qm)
 {
-	INIT_WORK(&qm->work, qm_work_process);
+	int i;
+
+	for (i = 0; i < qm->qp_num; i++)
+		INIT_WORK(&qm->poll_data[i].work, qm_work_process);
+
 	if (qm->fun_type == QM_HW_PF)
 		INIT_WORK(&qm->rst_work, hisi_qm_controller_reset);
 
 	if (qm->ver > QM_HW_V2)
 		INIT_WORK(&qm->cmd_process, qm_cmd_process);
+
+	qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
+				 WQ_UNBOUND, num_online_cpus(),
+				 pci_name(qm->pdev));
+	if (!qm->wq) {
+		pci_err(qm->pdev, "failed to alloc workqueue!\n");
+		return -ENOMEM;
+	}
+
+	return 0;
 }
 
 static int hisi_qp_alloc_memory(struct hisi_qm *qm)
@@ -6038,11 +6087,18 @@ static int hisi_qp_alloc_memory(struct hisi_qm *qm)
 	if (!qm->qp_array)
 		return -ENOMEM;
 
+	qm->poll_data = kcalloc(qm->qp_num, sizeof(struct hisi_qm_poll_data), GFP_KERNEL);
+	if (!qm->poll_data) {
+		kfree(qm->qp_array);
+		return -ENOMEM;
+	}
+
 	/* one more page for device or qp statuses */
 	qp_dma_size = qm->sqe_size * QM_Q_DEPTH +
 		      sizeof(struct qm_cqe) * QM_Q_DEPTH;
 	qp_dma_size = PAGE_ALIGN(qp_dma_size) + PAGE_SIZE;
 	for (i = 0; i < qm->qp_num; i++) {
+		qm->poll_data[i].qm = qm;
 		ret = hisi_qp_memory_init(qm, qp_dma_size, i);
 		if (ret)
 			goto err_init_qp_mem;
@@ -6176,7 +6232,10 @@ int hisi_qm_init(struct hisi_qm *qm)
 	if (ret)
 		goto err_alloc_uacce;
 
-	hisi_qm_init_work(qm);
+	ret = hisi_qm_init_work(qm);
+	if (ret)
+		goto err_free_qm_memory;
+
 	qm_cmd_init(qm);
 	atomic_set(&qm->status.flags, QM_INIT);
 
@@ -6184,6 +6243,8 @@ int hisi_qm_init(struct hisi_qm *qm)
 
 	return 0;
 
+err_free_qm_memory:
+	hisi_qm_memory_uninit(qm);
 err_alloc_uacce:
 	if (qm->use_sva) {
 		uacce_remove(qm->uacce);
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index c2e9b01187a74..42bb486f3b6d5 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -143,10 +143,10 @@ struct sec_ctx {
 	/* Threshold for fake busy, trigger to return -EBUSY to user */
 	u32 fake_req_limit;
 
-	/* Currrent cyclic index to select a queue for encipher */
+	/* Current cyclic index to select a queue for encipher */
 	atomic_t enc_qcyclic;
 
-	 /* Currrent cyclic index to select a queue for decipher */
+	 /* Current cyclic index to select a queue for decipher */
 	atomic_t dec_qcyclic;
 
 	enum sec_alg_type alg_type;
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 4d85d2cbf3767..2c0be91c0b094 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -508,16 +508,17 @@ static int sec_engine_init(struct hisi_qm *qm)
 
 	writel(SEC_SAA_ENABLE, qm->io_base + SEC_SAA_EN_REG);
 
-	/* HW V2 enable sm4 extra mode, as ctr/ecb */
-	if (qm->ver < QM_HW_V3)
+	if (qm->ver < QM_HW_V3) {
+		/* HW V2 enable sm4 extra mode, as ctr/ecb */
 		writel_relaxed(SEC_BD_ERR_CHK_EN0,
 			       qm->io_base + SEC_BD_ERR_CHK_EN_REG0);
 
-	/* Enable sm4 xts mode multiple iv */
-	writel_relaxed(SEC_BD_ERR_CHK_EN1,
-		       qm->io_base + SEC_BD_ERR_CHK_EN_REG1);
-	writel_relaxed(SEC_BD_ERR_CHK_EN3,
-		       qm->io_base + SEC_BD_ERR_CHK_EN_REG3);
+		/* HW V2 enable sm4 xts mode multiple iv */
+		writel_relaxed(SEC_BD_ERR_CHK_EN1,
+			       qm->io_base + SEC_BD_ERR_CHK_EN_REG1);
+		writel_relaxed(SEC_BD_ERR_CHK_EN3,
+			       qm->io_base + SEC_BD_ERR_CHK_EN_REG3);
+	}
 
 	/* config endian */
 	sec_set_endian(qm);
@@ -1002,8 +1003,6 @@ static int sec_pf_probe_init(struct sec_dev *sec)
 
 static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 {
-	int ret;
-
 	qm->pdev = pdev;
 	qm->ver = pdev->revision;
 	qm->algs = "cipher\ndigest\naead";
@@ -1029,25 +1028,7 @@ static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 		qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM;
 	}
 
-	/*
-	 * WQ_HIGHPRI: SEC request must be low delayed,
-	 * so need a high priority workqueue.
-	 * WQ_UNBOUND: SEC task is likely with long
-	 * running CPU intensive workloads.
-	 */
-	qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
-				 WQ_UNBOUND, num_online_cpus(),
-				 pci_name(qm->pdev));
-	if (!qm->wq) {
-		pci_err(qm->pdev, "fail to alloc workqueue\n");
-		return -ENOMEM;
-	}
-
-	ret = hisi_qm_init(qm);
-	if (ret)
-		destroy_workqueue(qm->wq);
-
-	return ret;
+	return hisi_qm_init(qm);
 }
 
 static void sec_qm_uninit(struct hisi_qm *qm)
@@ -1078,8 +1059,6 @@ static int sec_probe_init(struct sec_dev *sec)
 static void sec_probe_uninit(struct hisi_qm *qm)
 {
 	hisi_qm_dev_err_uninit(qm);
-
-	destroy_workqueue(qm->wq);
 }
 
 static void sec_iommu_used_check(struct sec_dev *sec)
diff --git a/drivers/crypto/hisilicon/trng/trng.c b/drivers/crypto/hisilicon/trng/trng.c
index 829f2caf0f67f..97e500db0a825 100644
--- a/drivers/crypto/hisilicon/trng/trng.c
+++ b/drivers/crypto/hisilicon/trng/trng.c
@@ -185,7 +185,7 @@ static int hisi_trng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
 	struct hisi_trng *trng;
 	int currsize = 0;
 	u32 val = 0;
-	u32 ret;
+	int ret;
 
 	trng = container_of(rng, struct hisi_trng, rng);
 
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 9c925e9c0a2d1..c3303d99acac7 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -990,8 +990,6 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
 
 static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 {
-	int ret;
-
 	qm->pdev = pdev;
 	qm->ver = pdev->revision;
 	if (pdev->revision >= QM_HW_V3)
@@ -1021,25 +1019,12 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
 		qm->qp_num = HZIP_QUEUE_NUM_V1 - HZIP_PF_DEF_Q_NUM;
 	}
 
-	qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
-				 WQ_UNBOUND, num_online_cpus(),
-				 pci_name(qm->pdev));
-	if (!qm->wq) {
-		pci_err(qm->pdev, "fail to alloc workqueue\n");
-		return -ENOMEM;
-	}
-
-	ret = hisi_qm_init(qm);
-	if (ret)
-		destroy_workqueue(qm->wq);
-
-	return ret;
+	return hisi_qm_init(qm);
 }
 
 static void hisi_zip_qm_uninit(struct hisi_qm *qm)
 {
 	hisi_qm_uninit(qm);
-	destroy_workqueue(qm->wq);
 }
 
 static int hisi_zip_probe_init(struct hisi_zip *hisi_zip)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c
index bb02e0db36153..7503f6b18ac56 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c
@@ -51,11 +51,47 @@ static const struct devlink_param otx2_cpt_dl_params[] = {
 			     NULL),
 };
 
-static int otx2_cpt_devlink_info_get(struct devlink *devlink,
+static int otx2_cpt_dl_info_firmware_version_put(struct devlink_info_req *req,
+						 struct otx2_cpt_eng_grp_info grp[],
+						 const char *ver_name, int eng_type)
+{
+	struct otx2_cpt_engs_rsvd *eng;
+	int i;
+
+	for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) {
+		eng = find_engines_by_type(&grp[i], eng_type);
+		if (eng)
+			return devlink_info_version_running_put(req, ver_name,
+								eng->ucode->ver_str);
+	}
+
+	return 0;
+}
+
+static int otx2_cpt_devlink_info_get(struct devlink *dl,
 				     struct devlink_info_req *req,
 				     struct netlink_ext_ack *extack)
 {
-	return devlink_info_driver_name_put(req, "rvu_cptpf");
+	struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl);
+	struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf;
+	int err;
+
+	err = devlink_info_driver_name_put(req, "rvu_cptpf");
+	if (err)
+		return err;
+
+	err = otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp,
+						    "fw.ae", OTX2_CPT_AE_TYPES);
+	if (err)
+		return err;
+
+	err = otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp,
+						    "fw.se", OTX2_CPT_SE_TYPES);
+	if (err)
+		return err;
+
+	return otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp,
+						    "fw.ie", OTX2_CPT_IE_TYPES);
 }
 
 static const struct devlink_ops otx2_cpt_devlink_ops = {
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
index 9cba2f714c7e1..f10050fead164 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
@@ -476,7 +476,7 @@ release_fw:
 	return ret;
 }
 
-static struct otx2_cpt_engs_rsvd *find_engines_by_type(
+struct otx2_cpt_engs_rsvd *find_engines_by_type(
 					struct otx2_cpt_eng_grp_info *eng_grp,
 					int eng_type)
 {
@@ -1605,7 +1605,10 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
 		if (!strncasecmp(val, "se", 2) && strchr(val, ':')) {
 			if (has_se || ucode_idx)
 				goto err_print;
-			tmp = strim(strsep(&val, ":"));
+			tmp = strsep(&val, ":");
+			if (!tmp)
+				goto err_print;
+			tmp = strim(tmp);
 			if (!val)
 				goto err_print;
 			if (strlen(tmp) != 2)
@@ -1617,7 +1620,10 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
 		} else if (!strncasecmp(val, "ae", 2) && strchr(val, ':')) {
 			if (has_ae || ucode_idx)
 				goto err_print;
-			tmp = strim(strsep(&val, ":"));
+			tmp = strsep(&val, ":");
+			if (!tmp)
+				goto err_print;
+			tmp = strim(tmp);
 			if (!val)
 				goto err_print;
 			if (strlen(tmp) != 2)
@@ -1629,7 +1635,10 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
 		} else if (!strncasecmp(val, "ie", 2) && strchr(val, ':')) {
 			if (has_ie || ucode_idx)
 				goto err_print;
-			tmp = strim(strsep(&val, ":"));
+			tmp = strsep(&val, ":");
+			if (!tmp)
+				goto err_print;
+			tmp = strim(tmp);
 			if (!val)
 				goto err_print;
 			if (strlen(tmp) != 2)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h
index 8f4d4e5f531a6..e69320a54b5d5 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h
@@ -166,4 +166,7 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
 int otx2_cpt_dl_custom_egrp_delete(struct otx2_cptpf_dev *cptpf,
 				   struct devlink_param_gset_ctx *ctx);
 void otx2_cpt_print_uc_dbg_info(struct otx2_cptpf_dev *cptpf);
+struct otx2_cpt_engs_rsvd *find_engines_by_type(
+					struct otx2_cpt_eng_grp_info *eng_grp,
+					int eng_type);
 #endif /* __OTX2_CPTPF_UCODE_H */
diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig
index 4b90c0f22b03f..1220cc86f9100 100644
--- a/drivers/crypto/qat/Kconfig
+++ b/drivers/crypto/qat/Kconfig
@@ -17,7 +17,7 @@ config CRYPTO_DEV_QAT
 
 config CRYPTO_DEV_QAT_DH895xCC
 	tristate "Support for Intel(R) DH895xCC"
-	depends on X86 && PCI
+	depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
 	select CRYPTO_DEV_QAT
 	help
 	  Support for Intel(R) DH895xcc with Intel(R) QuickAssist Technology
@@ -28,7 +28,7 @@ config CRYPTO_DEV_QAT_DH895xCC
 
 config CRYPTO_DEV_QAT_C3XXX
 	tristate "Support for Intel(R) C3XXX"
-	depends on X86 && PCI
+	depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
 	select CRYPTO_DEV_QAT
 	help
 	  Support for Intel(R) C3xxx with Intel(R) QuickAssist Technology
@@ -39,7 +39,7 @@ config CRYPTO_DEV_QAT_C3XXX
 
 config CRYPTO_DEV_QAT_C62X
 	tristate "Support for Intel(R) C62X"
-	depends on X86 && PCI
+	depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
 	select CRYPTO_DEV_QAT
 	help
 	  Support for Intel(R) C62x with Intel(R) QuickAssist Technology
@@ -50,7 +50,7 @@ config CRYPTO_DEV_QAT_C62X
 
 config CRYPTO_DEV_QAT_4XXX
 	tristate "Support for Intel(R) QAT_4XXX"
-	depends on X86 && PCI
+	depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
 	select CRYPTO_DEV_QAT
 	help
 	  Support for Intel(R) QuickAssist Technology QAT_4xxx
@@ -61,7 +61,7 @@ config CRYPTO_DEV_QAT_4XXX
 
 config CRYPTO_DEV_QAT_DH895xCCVF
 	tristate "Support for Intel(R) DH895xCC Virtual Function"
-	depends on X86 && PCI
+	depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
 	select PCI_IOV
 	select CRYPTO_DEV_QAT
 
@@ -74,7 +74,7 @@ config CRYPTO_DEV_QAT_DH895xCCVF
 
 config CRYPTO_DEV_QAT_C3XXXVF
 	tristate "Support for Intel(R) C3XXX Virtual Function"
-	depends on X86 && PCI
+	depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
 	select PCI_IOV
 	select CRYPTO_DEV_QAT
 	help
@@ -86,7 +86,7 @@ config CRYPTO_DEV_QAT_C3XXXVF
 
 config CRYPTO_DEV_QAT_C62XVF
 	tristate "Support for Intel(R) C62X Virtual Function"
-	depends on X86 && PCI
+	depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
 	select PCI_IOV
 	select CRYPTO_DEV_QAT
 	help
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index 0464fa2579295..b364bc06c732a 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -49,11 +49,6 @@ struct service_hndl {
 	struct list_head list;
 };
 
-static inline int get_current_node(void)
-{
-	return topology_physical_package_id(raw_smp_processor_id());
-}
-
 int adf_service_register(struct service_hndl *service);
 int adf_service_unregister(struct service_hndl *service);
 
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index 148edbe379e31..fb45fa83841c5 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -605,7 +605,7 @@ static int qat_alg_aead_newkey(struct crypto_aead *tfm, const u8 *key,
 {
 	struct qat_alg_aead_ctx *ctx = crypto_aead_ctx(tfm);
 	struct qat_crypto_instance *inst = NULL;
-	int node = get_current_node();
+	int node = numa_node_id();
 	struct device *dev;
 	int ret;
 
@@ -1065,7 +1065,7 @@ static int qat_alg_skcipher_newkey(struct qat_alg_skcipher_ctx *ctx,
 {
 	struct qat_crypto_instance *inst = NULL;
 	struct device *dev;
-	int node = get_current_node();
+	int node = numa_node_id();
 	int ret;
 
 	inst = qat_crypto_get_instance_node(node);
diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c
index 16d97db9ea15f..095ed2a404d2f 100644
--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
@@ -489,7 +489,7 @@ static int qat_dh_init_tfm(struct crypto_kpp *tfm)
 {
 	struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
 	struct qat_crypto_instance *inst =
-			qat_crypto_get_instance_node(get_current_node());
+			qat_crypto_get_instance_node(numa_node_id());
 
 	if (!inst)
 		return -EINVAL;
@@ -1225,7 +1225,7 @@ static int qat_rsa_init_tfm(struct crypto_akcipher *tfm)
 {
 	struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct qat_crypto_instance *inst =
-			qat_crypto_get_instance_node(get_current_node());
+			qat_crypto_get_instance_node(numa_node_id());
 
 	if (!inst)
 		return -EINVAL;
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 11fe9d213ae14..3afdaa0847736 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -31,7 +31,7 @@
 #define FSCRYPT_CONTEXT_V2	2
 
 /* Keep this in sync with include/uapi/linux/fscrypt.h */
-#define FSCRYPT_MODE_MAX	FSCRYPT_MODE_ADIANTUM
+#define FSCRYPT_MODE_MAX	FSCRYPT_MODE_AES_256_HCTR2
 
 struct fscrypt_context_v1 {
 	u8 version; /* FSCRYPT_CONTEXT_V1 */
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index c35711896bd4f..fbc71abdabe32 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -53,6 +53,13 @@ struct fscrypt_mode fscrypt_modes[] = {
 		.ivsize = 32,
 		.blk_crypto_mode = BLK_ENCRYPTION_MODE_ADIANTUM,
 	},
+	[FSCRYPT_MODE_AES_256_HCTR2] = {
+		.friendly_name = "AES-256-HCTR2",
+		.cipher_str = "hctr2(aes)",
+		.keysize = 32,
+		.security_strength = 32,
+		.ivsize = 32,
+	},
 };
 
 static DEFINE_MUTEX(fscrypt_mode_key_setup_mutex);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index a450189565e32..80b8ca0f340b2 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -61,7 +61,7 @@ fscrypt_get_dummy_policy(struct super_block *sb)
 	return sb->s_cop->get_dummy_policy(sb);
 }
 
-static bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode)
+static bool fscrypt_valid_enc_modes_v1(u32 contents_mode, u32 filenames_mode)
 {
 	if (contents_mode == FSCRYPT_MODE_AES_256_XTS &&
 	    filenames_mode == FSCRYPT_MODE_AES_256_CTS)
@@ -78,6 +78,14 @@ static bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode)
 	return false;
 }
 
+static bool fscrypt_valid_enc_modes_v2(u32 contents_mode, u32 filenames_mode)
+{
+	if (contents_mode == FSCRYPT_MODE_AES_256_XTS &&
+	    filenames_mode == FSCRYPT_MODE_AES_256_HCTR2)
+		return true;
+	return fscrypt_valid_enc_modes_v1(contents_mode, filenames_mode);
+}
+
 static bool supported_direct_key_modes(const struct inode *inode,
 				       u32 contents_mode, u32 filenames_mode)
 {
@@ -151,7 +159,7 @@ static bool supported_iv_ino_lblk_policy(const struct fscrypt_policy_v2 *policy,
 static bool fscrypt_supported_v1_policy(const struct fscrypt_policy_v1 *policy,
 					const struct inode *inode)
 {
-	if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
+	if (!fscrypt_valid_enc_modes_v1(policy->contents_encryption_mode,
 				     policy->filenames_encryption_mode)) {
 		fscrypt_warn(inode,
 			     "Unsupported encryption modes (contents %d, filenames %d)",
@@ -187,7 +195,7 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy,
 {
 	int count = 0;
 
-	if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
+	if (!fscrypt_valid_enc_modes_v2(policy->contents_encryption_mode,
 				     policy->filenames_encryption_mode)) {
 		fscrypt_warn(inode,
 			     "Unsupported encryption modes (contents %d, filenames %d)",
diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h
index 52363eee2b20e..506d56530ca93 100644
--- a/include/crypto/internal/blake2s.h
+++ b/include/crypto/internal/blake2s.h
@@ -8,7 +8,6 @@
 #define _CRYPTO_INTERNAL_BLAKE2S_H
 
 #include <crypto/blake2s.h>
-#include <crypto/internal/hash.h>
 #include <linux/string.h>
 
 void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
@@ -19,111 +18,4 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block,
 
 bool blake2s_selftest(void);
 
-static inline void blake2s_set_lastblock(struct blake2s_state *state)
-{
-	state->f[0] = -1;
-}
-
-/* Helper functions for BLAKE2s shared by the library and shash APIs */
-
-static __always_inline void
-__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen,
-		 bool force_generic)
-{
-	const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
-
-	if (unlikely(!inlen))
-		return;
-	if (inlen > fill) {
-		memcpy(state->buf + state->buflen, in, fill);
-		if (force_generic)
-			blake2s_compress_generic(state, state->buf, 1,
-						 BLAKE2S_BLOCK_SIZE);
-		else
-			blake2s_compress(state, state->buf, 1,
-					 BLAKE2S_BLOCK_SIZE);
-		state->buflen = 0;
-		in += fill;
-		inlen -= fill;
-	}
-	if (inlen > BLAKE2S_BLOCK_SIZE) {
-		const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
-		/* Hash one less (full) block than strictly possible */
-		if (force_generic)
-			blake2s_compress_generic(state, in, nblocks - 1,
-						 BLAKE2S_BLOCK_SIZE);
-		else
-			blake2s_compress(state, in, nblocks - 1,
-					 BLAKE2S_BLOCK_SIZE);
-		in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
-		inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
-	}
-	memcpy(state->buf + state->buflen, in, inlen);
-	state->buflen += inlen;
-}
-
-static __always_inline void
-__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic)
-{
-	blake2s_set_lastblock(state);
-	memset(state->buf + state->buflen, 0,
-	       BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
-	if (force_generic)
-		blake2s_compress_generic(state, state->buf, 1, state->buflen);
-	else
-		blake2s_compress(state, state->buf, 1, state->buflen);
-	cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
-	memcpy(out, state->h, state->outlen);
-}
-
-/* Helper functions for shash implementations of BLAKE2s */
-
-struct blake2s_tfm_ctx {
-	u8 key[BLAKE2S_KEY_SIZE];
-	unsigned int keylen;
-};
-
-static inline int crypto_blake2s_setkey(struct crypto_shash *tfm,
-					const u8 *key, unsigned int keylen)
-{
-	struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
-
-	if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE)
-		return -EINVAL;
-
-	memcpy(tctx->key, key, keylen);
-	tctx->keylen = keylen;
-
-	return 0;
-}
-
-static inline int crypto_blake2s_init(struct shash_desc *desc)
-{
-	const struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
-	struct blake2s_state *state = shash_desc_ctx(desc);
-	unsigned int outlen = crypto_shash_digestsize(desc->tfm);
-
-	__blake2s_init(state, outlen, tctx->key, tctx->keylen);
-	return 0;
-}
-
-static inline int crypto_blake2s_update(struct shash_desc *desc,
-					const u8 *in, unsigned int inlen,
-					bool force_generic)
-{
-	struct blake2s_state *state = shash_desc_ctx(desc);
-
-	__blake2s_update(state, in, inlen, force_generic);
-	return 0;
-}
-
-static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out,
-				       bool force_generic)
-{
-	struct blake2s_state *state = shash_desc_ctx(desc);
-
-	__blake2s_final(state, out, force_generic);
-	return 0;
-}
-
 #endif /* _CRYPTO_INTERNAL_BLAKE2S_H */
diff --git a/include/crypto/polyval.h b/include/crypto/polyval.h
new file mode 100644
index 0000000000000..1d630f371f777
--- /dev/null
+++ b/include/crypto/polyval.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common values for the Polyval hash algorithm
+ *
+ * Copyright 2021 Google LLC
+ */
+
+#ifndef _CRYPTO_POLYVAL_H
+#define _CRYPTO_POLYVAL_H
+
+#include <linux/types.h>
+#include <linux/crypto.h>
+
+#define POLYVAL_BLOCK_SIZE	16
+#define POLYVAL_DIGEST_SIZE	16
+
+void polyval_mul_non4k(u8 *op1, const u8 *op2);
+
+void polyval_update_non4k(const u8 *key, const u8 *in,
+			  size_t nblocks, u8 *accumulator);
+
+#endif
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 6cabafffd0dda..116e8bd68c999 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -265,6 +265,12 @@ struct hisi_qm_list {
 	void (*unregister_from_crypto)(struct hisi_qm *qm);
 };
 
+struct hisi_qm_poll_data {
+	struct hisi_qm *qm;
+	struct work_struct work;
+	u16 *qp_finish_id;
+};
+
 struct hisi_qm {
 	enum qm_hw_ver ver;
 	enum qm_fun_type fun_type;
@@ -302,6 +308,7 @@ struct hisi_qm {
 	struct rw_semaphore qps_lock;
 	struct idr qp_idr;
 	struct hisi_qp *qp_array;
+	struct hisi_qm_poll_data *poll_data;
 
 	struct mutex mailbox_lock;
 
@@ -312,7 +319,6 @@ struct hisi_qm {
 	u32 error_mask;
 
 	struct workqueue_struct *wq;
-	struct work_struct work;
 	struct work_struct rst_work;
 	struct work_struct cmd_process;
 
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
index 9f4428be3e362..a756b29afcc23 100644
--- a/include/uapi/linux/fscrypt.h
+++ b/include/uapi/linux/fscrypt.h
@@ -27,7 +27,8 @@
 #define FSCRYPT_MODE_AES_128_CBC		5
 #define FSCRYPT_MODE_AES_128_CTS		6
 #define FSCRYPT_MODE_ADIANTUM			9
-/* If adding a mode number > 9, update FSCRYPT_MODE_MAX in fscrypt_private.h */
+#define FSCRYPT_MODE_AES_256_HCTR2		10
+/* If adding a mode number > 10, update FSCRYPT_MODE_MAX in fscrypt_private.h */
 
 /*
  * Legacy policy version; ad-hoc KDF and no key verification.
diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c
index 409e4b7287704..66f505220f43f 100644
--- a/lib/crypto/blake2s-selftest.c
+++ b/lib/crypto/blake2s-selftest.c
@@ -4,6 +4,8 @@
  */
 
 #include <crypto/internal/blake2s.h>
+#include <linux/kernel.h>
+#include <linux/random.h>
 #include <linux/string.h>
 
 /*
@@ -587,5 +589,44 @@ bool __init blake2s_selftest(void)
 		}
 	}
 
+	for (i = 0; i < 32; ++i) {
+		enum { TEST_ALIGNMENT = 16 };
+		u8 unaligned_block[BLAKE2S_BLOCK_SIZE + TEST_ALIGNMENT - 1]
+					__aligned(TEST_ALIGNMENT);
+		u8 blocks[BLAKE2S_BLOCK_SIZE * 3];
+		struct blake2s_state state1, state2;
+
+		get_random_bytes(blocks, sizeof(blocks));
+		get_random_bytes(&state, sizeof(state));
+
+#if defined(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) && \
+    defined(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)
+		memcpy(&state1, &state, sizeof(state1));
+		memcpy(&state2, &state, sizeof(state2));
+		blake2s_compress(&state1, blocks, 3, BLAKE2S_BLOCK_SIZE);
+		blake2s_compress_generic(&state2, blocks, 3, BLAKE2S_BLOCK_SIZE);
+		if (memcmp(&state1, &state2, sizeof(state1))) {
+			pr_err("blake2s random compress self-test %d: FAIL\n",
+			       i + 1);
+			success = false;
+		}
+#endif
+
+		memcpy(&state1, &state, sizeof(state1));
+		blake2s_compress(&state1, blocks, 1, BLAKE2S_BLOCK_SIZE);
+		for (l = 1; l < TEST_ALIGNMENT; ++l) {
+			memcpy(unaligned_block + l, blocks,
+			       BLAKE2S_BLOCK_SIZE);
+			memcpy(&state2, &state, sizeof(state2));
+			blake2s_compress(&state2, unaligned_block + l, 1,
+					 BLAKE2S_BLOCK_SIZE);
+			if (memcmp(&state1, &state2, sizeof(state1))) {
+				pr_err("blake2s random compress align %d self-test %d: FAIL\n",
+				       l, i + 1);
+				success = false;
+			}
+		}
+	}
+
 	return success;
 }
diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
index c71c09621c09c..98e688c6d8910 100644
--- a/lib/crypto/blake2s.c
+++ b/lib/crypto/blake2s.c
@@ -16,16 +16,44 @@
 #include <linux/init.h>
 #include <linux/bug.h>
 
+static inline void blake2s_set_lastblock(struct blake2s_state *state)
+{
+	state->f[0] = -1;
+}
+
 void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
 {
-	__blake2s_update(state, in, inlen, false);
+	const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
+
+	if (unlikely(!inlen))
+		return;
+	if (inlen > fill) {
+		memcpy(state->buf + state->buflen, in, fill);
+		blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
+		state->buflen = 0;
+		in += fill;
+		inlen -= fill;
+	}
+	if (inlen > BLAKE2S_BLOCK_SIZE) {
+		const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
+		blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
+		in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+		inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+	}
+	memcpy(state->buf + state->buflen, in, inlen);
+	state->buflen += inlen;
 }
 EXPORT_SYMBOL(blake2s_update);
 
 void blake2s_final(struct blake2s_state *state, u8 *out)
 {
 	WARN_ON(IS_ENABLED(DEBUG) && !out);
-	__blake2s_final(state, out, false);
+	blake2s_set_lastblock(state);
+	memset(state->buf + state->buflen, 0,
+	       BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
+	blake2s_compress(state, state->buf, 1, state->buflen);
+	cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
+	memcpy(out, state->h, state->outlen);
 	memzero_explicit(state, sizeof(*state));
 }
 EXPORT_SYMBOL(blake2s_final);
@@ -38,12 +66,7 @@ static int __init blake2s_mod_init(void)
 	return 0;
 }
 
-static void __exit blake2s_mod_exit(void)
-{
-}
-
 module_init(blake2s_mod_init);
-module_exit(blake2s_mod_exit);
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("BLAKE2s hash function");
 MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
diff --git a/lib/mpi/mpi-add.c b/lib/mpi/mpi-add.c
index 2cdae54c1bd01..9056fc5167fc8 100644
--- a/lib/mpi/mpi-add.c
+++ b/lib/mpi/mpi-add.c
@@ -138,7 +138,7 @@ void mpi_sub(MPI w, MPI u, MPI v)
 	mpi_add(w, u, vv);
 	mpi_free(vv);
 }
-
+EXPORT_SYMBOL_GPL(mpi_sub);
 
 void mpi_addm(MPI w, MPI u, MPI v, MPI m)
 {
diff --git a/lib/mpi/mpi-mul.c b/lib/mpi/mpi-mul.c
index 8f5fa200f2971..7f4eda8560dc9 100644
--- a/lib/mpi/mpi-mul.c
+++ b/lib/mpi/mpi-mul.c
@@ -82,6 +82,7 @@ void mpi_mul(MPI w, MPI u, MPI v)
 	if (tmp_limb)
 		mpi_free_limb_space(tmp_limb);
 }
+EXPORT_SYMBOL_GPL(mpi_mul);
 
 void mpi_mulm(MPI w, MPI u, MPI v, MPI m)
 {
author	Stephen Rothwell <sfr@canb.auug.org.au>	2022-06-28 11:26:37 +1000
committer	Stephen Rothwell <sfr@canb.auug.org.au>	2022-06-28 11:26:37 +1000
commit	ad9dd1674d6bee8c27f02f6c61e5328cbc0bfb64 (patch)
tree	e05ac3b8b9c8de9c85f2711c45199a000ea0c26f
parent	09239bdc1e3c1626871ffc34250be5332c3c0ef8 (diff)
parent	9c846c5d2d4e63d75b2cb172625087cadadbe065 (diff)
download	linux-next-ad9dd1674d6bee8c27f02f6c61e5328cbc0bfb64.tar.gz