This commit is contained in:
jude 2023-04-20 10:13:03 +01:00
parent f8b0608ab3
commit f9805c4806
5 changed files with 160 additions and 66 deletions

View File

@ -1,4 +1,9 @@
import { cryptoRandom, generate_prime, KEY_SIZE } from "./random_primes.js"; import {
cryptoRandom,
generate_prime,
generate_safe_prime,
KEY_SIZE,
} from "./random_primes.js";
import { gcd, mod_exp } from "./math.js"; import { gcd, mod_exp } from "./math.js";
const PAILLIER = 0; const PAILLIER = 0;
@ -371,25 +376,8 @@ export function generate_keypair() {
p = BigInt(window.sessionStorage.getItem("p")); p = BigInt(window.sessionStorage.getItem("p"));
q = BigInt(window.sessionStorage.getItem("q")); q = BigInt(window.sessionStorage.getItem("q"));
} else { } else {
let p1 = generate_prime(); p = generate_safe_prime();
while (p1 % 4n !== 3n) { q = generate_safe_prime();
p1 = generate_prime();
}
let primes = [p1];
while (
check_gcd(primes.slice(0, primes.length - 1), primes[primes.length - 1]) ===
null
) {
q = generate_prime();
while (q % 4n !== 3n) {
q = generate_prime();
}
primes.push(q);
}
p = check_gcd(primes.slice(0, primes.length - 1), primes[primes.length - 1]);
} }
window.sessionStorage.setItem("p", p); window.sessionStorage.setItem("p", p);

View File

@ -97,6 +97,15 @@ export function generate_prime() {
} }
} }
export function generate_safe_prime() {
while (true) {
let n = generate_prime();
if (small_prime_test((n - 1n) / 2n) && miller_rabin((n - 1n) / 2n, 40)) {
return n;
}
}
}
const SMALL_PRIMES = [ const SMALL_PRIMES = [
2n, 2n,
3n, 3n,

View File

@ -27,7 +27,7 @@
\usepackage{natbib} % Bibliography and citations \usepackage{natbib} % Bibliography and citations
\newcommand*{\urlprefix}{Available from: }% in the Harvard-Bath style \newcommand*{\urlprefix}{Available from: }% in the Harvard-Bath style
\newcommand*{\urldateprefix}{Accessed } % \newcommand*{\urldateprefix}{Accessed } %
\bibliographystyle{abbrv} % \bibliographystyle{plain} %
\pagestyle{headings} \pagestyle{headings}

Binary file not shown.

View File

@ -8,7 +8,6 @@
% requirements spec, clear evaluation of security with links to proofs etc. % requirements spec, clear evaluation of security with links to proofs etc.
% clear achievements of content in regards to goals % clear achievements of content in regards to goals
% reflect on achievemtns, difficulties, novelty etc. % reflect on achievemtns, difficulties, novelty etc.
% references as nouns
% diagrams maybe % diagrams maybe
\usepackage{Bath-CS-Dissertation} \usepackage{Bath-CS-Dissertation}
@ -17,6 +16,7 @@
\usepackage{amsthm} \usepackage{amsthm}
\usepackage{tikz} \usepackage{tikz}
\usepackage{minted} \usepackage{minted}
\usepackage{lscape}
\usepackage{multirow} \usepackage{multirow}
\usepackage{tabularx} \usepackage{tabularx}
\usepackage{booktabs} \usepackage{booktabs}
@ -114,7 +114,7 @@ Bit commitment schemes provide a mechanism for one party to commit to some hidde
\subsubsection{Commutative cryptography} \subsubsection{Commutative cryptography}
\cite{Shamir1981} provides a protocol using bit commitment to play poker. They offer a bit commitment scheme using commutative encryption algorithms based on modular arithmetic. This scheme works by each player encrypting cards, and decrypting in a different order as to obscure the value of the actual cards until all players have decrypted. Protocols exist that utilise bit commitment to play poker \cite{Shamir1981}. They offer a bit commitment scheme using commutative encryption algorithms based on modular arithmetic. This scheme works by each player encrypting cards, and decrypting in a different order as to obscure the value of the actual cards until all players have decrypted.
However, almost all well-documented encryption schemes are not commutative. One alternative is to use some well-known one-way function, such as SHA, with randomly generated salts. However, almost all well-documented encryption schemes are not commutative. One alternative is to use some well-known one-way function, such as SHA, with randomly generated salts.
@ -128,7 +128,7 @@ Bit commitment schemes can also be implemented using one-way functions: \begin{e
\item The second party computes $c' = H(m, r)$ and validates that $c = c'$. \item The second party computes $c' = H(m, r)$ and validates that $c = c'$.
\end{enumerate} \end{enumerate}
\cite{blum1983coin} provides a protocol for flipping fair coins "across a telephone", which is isomorphic to selecting a random value from a set of two values. This cannot be simply repeated though to generate numbers in the range of 1-6, as 6 is not a power of 2. Protocols exist for flipping fair coins "across a telephone", which is isomorphic to selecting a random value from a set of two values \cite{blum1983coin}. This cannot be simply repeated though to generate numbers in the range of 1-6, as 6 is not a power of 2.
However, a similar protocol can be used where each player commits to a single value $x \in \mathbb{Z}_6$. As the distribution of outcomes of addition in the group $\mathbb{Z}_n$ is fair, we can then sum the values of $x$ committed to by both players to deduce a final value for the roll. To decrease the amount of communications required for rolling a number of dice, a vector of values can be used. However, a similar protocol can be used where each player commits to a single value $x \in \mathbb{Z}_6$. As the distribution of outcomes of addition in the group $\mathbb{Z}_n$ is fair, we can then sum the values of $x$ committed to by both players to deduce a final value for the roll. To decrease the amount of communications required for rolling a number of dice, a vector of values can be used.
@ -149,10 +149,11 @@ Zero-knowledge proofs are particularly applicable to the presented problem. They
\item The proof presented can only be trusted by the verifier, and not by other parties. \item The proof presented can only be trusted by the verifier, and not by other parties.
\end{itemize} \end{itemize}
We can further formalise the general description of a zero-knowledge proof. \cite{mohr2007survey} provides a common formalisation of the concept of a zero-knowledge proof system for a language $L$ by stating that \begin{itemize} We can further formalise the general description of a zero-knowledge proof. The common formalisation of the concept of a zero-knowledge proof system for a language $L$ is
\begin{itemize}
\item For every $x \in L$, the verifier will accept $x$ following interaction with a prover. \item For every $x \in L$, the verifier will accept $x$ following interaction with a prover.
\item For some polynomial $p$ and any $x \notin S$, the verifier will reject $x$ with probability at least $\frac{1}{p(|x|)}$. \item For some polynomial $p$ and any $x \notin S$, the verifier will reject $x$ with probability at least $\frac{1}{p(|x|)}$.
\item A verifier can produce a simulator $S$ such that for all $x \in L$, the outputs of $S(x)$ are indistinguishable from a transcript of the proving steps taken with the prover on $x$. \item A verifier can produce a simulator $S$ such that for all $x \in L$, the outputs of $S(x)$ are indistinguishable from a transcript of the proving steps taken with the prover on $x$.
\end{itemize} \end{itemize}
The final point describes a proof as being \textit{computationally zero-knowledge}. Some stronger conditions exist, which describe the distributions of the outputs of the simulator versus the distributions of the outputs of interaction with the prover. \begin{itemize} The final point describes a proof as being \textit{computationally zero-knowledge}. Some stronger conditions exist, which describe the distributions of the outputs of the simulator versus the distributions of the outputs of interaction with the prover. \begin{itemize}
@ -187,7 +188,7 @@ The goal is then to identify ways to secure this protocol by obscuring the edges
\subsubsection{Graphs \& zero-knowledge proofs} \subsubsection{Graphs \& zero-knowledge proofs}
\cite{10.1145/116825.116852} identifies methods to construct zero-knowledge proofs for two graphs being isomorphic or non-isomorphic. A typical example for zero-knowledge proofs is graph isomorphism \cite{10.1145/116825.116852}.
Identifying Risk as a graph therefore enables us to construct isomorphisms as part of the proof protocol. For example, when a player wishes to commit to a movement, it is important to prove that the initial node and the new node are adjacent. This can be proven by communicating isomorphic graphs, and constructing challenges based on the edges of the original graph. Identifying Risk as a graph therefore enables us to construct isomorphisms as part of the proof protocol. For example, when a player wishes to commit to a movement, it is important to prove that the initial node and the new node are adjacent. This can be proven by communicating isomorphic graphs, and constructing challenges based on the edges of the original graph.
@ -223,9 +224,35 @@ A similar issue appears in the proposed system: a cheating player could update t
\subsubsection{Additive homomorphic cryptosystems} \subsubsection{Additive homomorphic cryptosystems}
Some cryptosystems admit an additive homomorphic property: that is, given the public key and two encrypted values $\sigma_1 = E(m_1), \sigma_2 = E(m_2)$, the value $\sigma_1 + \sigma_2 = E(m_1 + m_2)$ is the ciphertext of the underlying operation. Some cryptosystems admit an additive homomorphic property: that is, given the public key and two encrypted values $\sigma_1 = E(m_1), \sigma_2 = E(m_2)$, the value $\sigma_1 + \sigma_2 = E(m_1 + m_2)$ is the ciphertext of the underlying operation.
\cite{paillier1999public} defined a cryptosystem based on composite residuosity classes, which expresses this property. \cite[Section~5.2]{damgard2003} demonstrates an honest-verifier zero-knowledge proof for proving a given value is 0. Hence, clearly, proving a summation $a + b = v$ can be performed by proving $v - a - b = 0$ in an additive homomorphic cryptosystem. The Paillier cryptosystem, which is based on composite residuosity classes express the additive homomorphic property \cite{paillier1999public}. This is due to the structure of ciphertexts in the Paillier cryptosystem. A public key is of structure $(n, g)$, where $n$ is the product of two large primes and $g$ is a generator of $\mathbb{Z}^*_n$. Under the public key, the encryption $c$ of a message $m$ is computed as \begin{align*}
c = g^mr^n \mod n^2
\end{align*}
for some random $r \in \mathbb{Z}^*_{n^2}$.
The Paillier cryptosystem has disadvantages in its time and space complexity compared to other public-key cryptosystems such as RSA. In space complexity, Paillier ciphertexts are twice the size of their corresponding plaintext, as for a modulus $n$, ciphertexts are computed modulo $n^2$ for a message in range up to $n$. This cost can be reduced by employing some form of compression on the resulting ciphertexts.
The main concern is the issue of time complexity of Paillier. Theoretic results based on the number of multiplications performed indicate that Paillier can be 1,000 times slower than RSA encryption. Many optimisations have been presented of the Paillier cryptosystem.
The first is in the selection of public parameter $g$. The original paper suggests a choice of $g = 2$, however the choice of $g = 1 + n$ is very common, as the exponentiation $g^m = 1 + mn$ by binomial theorem.
Another optimisation is that of Jurik \cite[Section~2.3.1]{Jurik2003ExtensionsTT}: Jurik proposes that the public-key is instead $(n, g, h)$, where $h$ is the generator of the group $\mathbb{Z}^*_{n}[+1]$ (the group of units with Jacobi symbol $+1$). Then, an encryption $c'$ of a message $m$ is computed as \begin{align*}
c' = g^m (h^r \mod n)^n\mod n^2
\end{align*}
for some random $r \in \mathbb{Z}^*_{n}$.
The optimisation comes in two parts: firstly, the mantissa is smaller, resulting in faster multiplications. Secondly, by taking $h_n = h^n \mod n^2$, we find the following equivalence: \begin{align*}
(h^r \mod n)^n \mod n^2 = h_n^r \mod n^2
\end{align*}
Exponentials of the fixed base $h_n$ can then be pre-computed to accelerate exponentiation by arbitrary $r$. For example, one might pre-compute $h_n^{2^0} \mod n, h_n^{2^1} \mod n, h_n^{2^2} \mod n, \dots, h_n^{2^n} \mod n$. This reduces exponentiation to up to $n$ multiplications.
Jurik states that the optimised form can lead to a theoretic four times speedup over Paillier's original form.
\subsubsection{Zero-knowledge proofs in Paillier cryptosystem}
There exist honest-verifier zero-knowledge proofs for proving a given value is 0 \cite[Section~5.2]{damgard2003}. Hence, clearly, proving a summation $a + b = v$ can be performed by proving $v - a - b = 0$ in an additive homomorphic cryptosystem.
So, using some such scheme to obscure edge weights should enable verification of the edge values without revealing their actual values. So, using some such scheme to obscure edge weights should enable verification of the edge values without revealing their actual values.
@ -260,6 +287,53 @@ Despite this approach being centralised, it does emulate a fully peer-to-peer en
In particular, the final point allows for the use of purely JSON messages, which are readily parsed and processed by the client-side JavaScript. In particular, the final point allows for the use of purely JSON messages, which are readily parsed and processed by the client-side JavaScript.
\begin{landscape}
\begin{tikzpicture}[every node/.style={anchor=north west, outer sep=0.1cm}]
% Create outlines
\node[
rectangle,
dashed,
draw,
minimum width=0.5\hsize-4pt,
minimum height=0.5\textheight-4pt,
align=right
] at (0, 0.5\textheight+4pt) {};
\node[anchor=south west] at (0, 0) {Setup};
\node[
rectangle,
dashed,
draw,
minimum width=0.5\hsize-4pt,
minimum height=0.5\textheight-4pt,
align=right
] at (0, 0) {};
\node[anchor=south west] at (0, -0.5\textheight-4pt) {Pre-game};
\node[
rectangle,
dashed,
draw,
minimum width=0.5\hsize-4pt,
minimum height=\textheight,
align=right,
] at (0.5\hsize+4pt, 0.5\textheight+4pt) {};
% This node doesnt position correctly......
\node[anchor=south west] at (0.5\hsize+4pt, -0.5\textheight-2pt) {Game};
% Player connect handling
\node[draw=blue!50,rectangle,very thick,rounded corners=0.1mm] (Connect) at (8pt, 0.5\textheight-4pt) {Player connects};
% Player disconnect handling
\node[draw=blue!50,rectangle,very thick,rounded corners=0.1mm] (Connect) at (8pt, 0.5\textheight-4pt) {Player connects};
\end{tikzpicture}
\end{landscape}
\subsection{Message structure} \subsection{Message structure}
Messages are given a fixed structure to make processing simpler. Each JSON message holds an \texttt{author} field, being the sender's ID; a message ID to prevent replay attacks and associate related messages; and an \texttt{action}, which at a high level dictates how each client should process the message. Messages are given a fixed structure to make processing simpler. Each JSON message holds an \texttt{author} field, being the sender's ID; a message ID to prevent replay attacks and associate related messages; and an \texttt{action}, which at a high level dictates how each client should process the message.
@ -272,15 +346,15 @@ Players trust RSA keys on a trust-on-first-use (TOFU) basis. TOFU is the same pr
\subsection{Paillier cryptosystem} \subsection{Paillier cryptosystem}
Similar to RSA, Paillier requires the calculation of two large primes for the generation of public and private key pairs. ECMAScript typically stores integers as floating point numbers, giving precision up to $2^{53}$. This is clearly inappropriate for the generation of sufficiently large primes. ECMAScript typically stores integers as floating point numbers, giving precision up to $2^{53}$. This is clearly inappropriate for the generation of sufficiently large primes for the Paillier cryptosystem.
In 2020, ECMAScript introduced \texttt{BigInt} \cite{tc39}, which are, as described in the spec, "arbitrary precision integers". Whilst this does not hold true in common ECMAScript implementations (such as Chrome's V8), these "big integers" still provide sufficient precision for the Paillier cryptosystem, given some optimisations and specialisations are made with regards to the Paillier algorithm and in particular the modular exponentiation operation. In 2020, ECMAScript introduced \texttt{BigInt}, which are, as described in the spec, "arbitrary precision integers" \cite{tc39}. Whilst this does not hold true in common ECMAScript implementations (such as Chrome's V8), these "big integers" still provide sufficient precision for the Paillier cryptosystem.
It must be noted that \texttt{BigInt} is inappropriate for cryptography in practice, due to the possibility of timing attacks as operations are not necessarily constant time \cite{tc39}. In particular, modular exponentiation is non-constant time, and operates frequently on secret data. A savvy attacker may be able to use this to leak information about an adversary's private key; however, as decryption is not performed, this risk is considerably reduced as there is less need to perform optimisations based on Chinese remainder theorem which would require treating the modulus $n$ as its two components $p$ and $q$. It must be noted that \texttt{BigInt} is inappropriate for cryptography in practice, due to the possibility of timing attacks as operations are not necessarily constant time \cite{tc39}. In particular, modular exponentiation is non-constant time, and operates frequently on secret data. A savvy attacker may be able to use this to leak information about an adversary's private key; however, as decryption is not performed, this risk is considerably reduced as there is less need to perform optimisations based on Chinese remainder theorem which would require treating the modulus $n$ as its two components $p$ and $q$.
\subsection{Modular exponentiation} \subsection{Modular exponentiation}
As \texttt{BigInt}'s V8 implementation does not optimise modular exponentiation, we employ the use of addition chaining, as described in \cite{schneier_1996}. Addition chaining breaks a modular exponentiation into repeated square-and-modulo operations, which are computationally inexpensive to perform. As \texttt{BigInt}'s V8 implementation does not optimise modular exponentiation itself, we employ the use of addition chaining \cite{schneier_1996}. Addition chaining breaks a modular exponentiation into repeated square-and-modulo operations, which are less expensive to perform.
The number of operations is dependent primarily on the size of the exponent. For an exponent of bit length $L$, somewhere between $L$ and $2L$ multiply-and-modulo operations are performed, which gives overall a logarithmic time complexity supposing bit-shifts and multiply-and-modulo are constant time operations. The number of operations is dependent primarily on the size of the exponent. For an exponent of bit length $L$, somewhere between $L$ and $2L$ multiply-and-modulo operations are performed, which gives overall a logarithmic time complexity supposing bit-shifts and multiply-and-modulo are constant time operations.
@ -290,7 +364,7 @@ Generating primes is a basic application of the Rabin-Miller primality test \cit
\subsection{Public key} \subsection{Public key}
In the Paillier cryptosystem, the public key is a pair $(n, g)$ where $n = pq$ for primes $p, q$ satisfying $\gcd(pq, (p - 1)(q - 1)) = 1$ and $g \in \mathbb{Z}^*_{n^2}$. We restrict the range of plaintexts $m$ to $m < n$. In the Paillier cryptosystem, the public key is a pair $(n, g)$ where $n = pq$ for primes $p, q$ satisfying $\gcd(pq, (p - 1)(q - 1)) = 1$ and $g \in \mathbb{Z}^*_{n^2}$. The range of plaintexts $m$ is restricted to $0 < m < n$.
The Paillier cryptosystem is otherwise generic over the choice of primes $p, q$. However, by choosing $p, q$ of equal length, the required property of $pq$ and $(p - 1)(q - 1)$ being coprime is guaranteed. The Paillier cryptosystem is otherwise generic over the choice of primes $p, q$. However, by choosing $p, q$ of equal length, the required property of $pq$ and $(p - 1)(q - 1)$ being coprime is guaranteed.
@ -302,7 +376,7 @@ The Paillier cryptosystem is otherwise generic over the choice of primes $p, q$.
Without loss of generality, assume $p > q$. Suppose $\gcd(pq, (p - 1)(q - 1)) \neq 1$. Then, $q \mid p - 1$. However, the bit-lengths of $p, q$ are identical. So $\frac{1}{2}(p - 1) < q$. This is a contradiction to $q \mid p - 1$ (as 2 is the smallest possible divisor), and so we must have $\gcd(pq, (p - 1)(q - 1)) = 1$ as required. Without loss of generality, assume $p > q$. Suppose $\gcd(pq, (p - 1)(q - 1)) \neq 1$. Then, $q \mid p - 1$. However, the bit-lengths of $p, q$ are identical. So $\frac{1}{2}(p - 1) < q$. This is a contradiction to $q \mid p - 1$ (as 2 is the smallest possible divisor), and so we must have $\gcd(pq, (p - 1)(q - 1)) = 1$ as required.
\end{proof} \end{proof}
As the prime generation routine generates primes of equal length, this property is therefore guaranteed. The next optimisation is to select the public parameter $g = 1 + n$. As the prime generation routine generates primes of equal length, this property is therefore guaranteed. The next step is to select the public parameter $g$ as $g = 1 + n$.
\begin{proposition} \begin{proposition}
$1 + n \in \mathbb{Z}^*_{n^2}$. $1 + n \in \mathbb{Z}^*_{n^2}$.
@ -312,8 +386,40 @@ As the prime generation routine generates primes of equal length, this property
We see that $(1 + n)^n \equiv 1 \mod n^2$ from binomial expansion. So $1 + n$ is invertible as required. We see that $(1 + n)^n \equiv 1 \mod n^2$ from binomial expansion. So $1 + n$ is invertible as required.
\end{proof} \end{proof}
The selection of such $g$ is ideal, as the binomial expansion property reduces the number of calculations needed during modular exponentiation. Clearly, from the same result, $g^m = 1 + mn$. This operation is far easier to perform than a general modular exponentiation, as it fits within the valid range of the \texttt{BigInt} type, so no intermediary steps are needed to perform the computation. Besides reducing the number of operations to perform exponentiation, exponentiation also does not require auxiliary memory to store intermediary values used in the calculation.
In Jurik's form, we also need to compute $h$, a generator of the Jacobi subgroup, and impose restrictions on $p, q$. In particular, it is required that $p \equiv q \equiv 3 \mod 4$, $\gcd(p-1, q-1) = 2$, and that $p-1, q-1$ consist of large factors except for 2. One method to guarantee this is to use safe primes, which are primes of form $2p+1$ for $p$ prime.
\begin{proposition}
Safe primes are of form $p \equiv 3 \mod 4$
\end{proposition}
\begin{proof}
Let $q$ prime and $p = 2q+1$ the corresponding safe prime. Then, \begin{align*}
q \equiv 1 \mod 4 &\implies 2q+1 \equiv 3 \mod 4 \\
q \equiv 3 \mod 4 &\implies 2q+1 \equiv 3 \mod 4
\end{align*}
as required.
\end{proof}
\begin{proposition}
Safe primes $p \neq q$ satisfy $\gcd(p - 1, q - 1) = 2$
\end{proposition}
\begin{proof}
As $p, q$ are safe, $\frac{p - 1}{2}$ and $\frac{q - 1}{2}$ are prime. So \begin{align*}
\gcd\left(\frac{p - 1}{2}, \frac{q - 1}{2}\right) = 1 \implies \gcd(p - 1, q - 1) = 2
\end{align*}
\end{proof}
To identify safe primes, first we generate a prime $p$, and then test the primality of $\frac{p - 1}{2}$.
Finally, to get the public parameter $h$, we compute $h = -x^2 \mod n$ for some random $x \in \mathbb{Z}_n^*$. With high likelihood $x$ is coprime to $n$, and so the Jacobi symbol is computed as \begin{align*}
\left(\frac{-x^2}{n}\right) = \left(\frac{-x^2}{p}\right)\left(\frac{-x^2}{q}\right) = (-1)^2 = 1
\end{align*}
This gives us our public key $(n, g, h)$.
%todo got up to here
\subsection{Encryption} \subsection{Encryption}
In the original Paillier scheme, ciphertexts are computed as $c = g^m r^n \mod n^2$ for $r < n$ some random secret value. To make this easier to compute, we can compute the equivalent value $c = (r^n \mod n^2) \cdot (g^m \mod n^2) \mod n^2$. In the original Paillier scheme, ciphertexts are computed as $c = g^m r^n \mod n^2$ for $r < n$ some random secret value. To make this easier to compute, we can compute the equivalent value $c = (r^n \mod n^2) \cdot (g^m \mod n^2) \mod n^2$.
@ -405,7 +511,7 @@ As this protocol must run many times during a game, we consider each operation o
\subsection{Proof system} \subsection{Proof system}
The first proof to discuss is that of \cite[Section~5.2]{damgard2003}, in which the authors give an honest-verifier protocol to prove knowledge that a ciphertext is an encryption of zero. The first proof to discuss is the honest-verifier protocol to prove knowledge that a ciphertext is an encryption of zero \cite[Section~5.2]{damgard2003}.
The proof system presented is a Schnorr-style interactive proof for a given ciphertext $c$ being an encryption of zero. The proof system presented is a Schnorr-style interactive proof for a given ciphertext $c$ being an encryption of zero.
@ -511,7 +617,6 @@ Additionally, we can consider this protocol perfect zero-knowledge.
In the random oracle model, \hyperref[protocol1]{Protocol~\ref*{protocol1}} is perfect zero-knowledge. In the random oracle model, \hyperref[protocol1]{Protocol~\ref*{protocol1}} is perfect zero-knowledge.
\end{proposition} \end{proposition}
% todo this is not correct: the verifiers are not honest
\begin{proof} \begin{proof}
To prove perfect zero-knowledge, we require a polynomial-time algorithm $T^*$ such that for all verifiers and for all valid sets $S$, the set of transcripts $T(P, V, S) = T^*(S)$, and the distributions are identical. To prove perfect zero-knowledge, we require a polynomial-time algorithm $T^*$ such that for all verifiers and for all valid sets $S$, the set of transcripts $T(P, V, S) = T^*(S)$, and the distributions are identical.
@ -520,17 +625,19 @@ Additionally, we can consider this protocol perfect zero-knowledge.
\item Choose random $(r_i^*)'$ from the random oracle. \item Choose random $(r_i^*)'$ from the random oracle.
\item Encrypt under $P$'s public-key. \item Encrypt under $P$'s public-key.
\item Verifier picks $c$ as before. \item Verifier picks $c$ as before.
\item Perform proofs of zero, which are also perfect zero-knowledge from \cite{damgard2003}. \item Perform proofs of zero, which are also perfect zero-knowledge \cite{damgard2003}.
\end{enumerate} \end{enumerate}
This gives $T^*$ such that $T^*(S) = T(P, V, S)$, and the output distributions are identical. Hence, this proof is perfect zero-knowledge under random oracle model. This gives $T^*$ such that $T^*(S) = T(P, V, S)$, and the output distributions are identical. Hence, this proof is perfect zero-knowledge under random oracle model.
\end{proof} \end{proof}
In reality, as we are using Jurik's form of Paillier, the best we can hope for is computational zero-knowledge, as Jurik's form relies upon the computational indistinguishability of the sequence generated by powers of $h$ to random powers.
\subsection{Optimising} \subsection{Optimising}
It is preferred that these proofs can be performed with only a few communications: this issue is particularly prevalent here as this protocol requires multiple rounds to complete. The independence of each round on the next is a beneficial property, as it means the proof can be performed in parallel, so the prover transmits \textit{all} of their $\psi$'s, then the verifier transmits all of their challenges. However, still is the issue of performing proofs of zero. It is preferred that these proofs can be performed with only a few communications: this issue is particularly prevalent here as this protocol requires multiple rounds to complete. The independence of each round on the next is a beneficial property, as it means the proof can be performed in parallel, so the prover transmits \textit{all} of their $\psi$'s, then the verifier transmits all of their challenges. However, still is the issue of performing proofs of zero.
We can apply the Fiat-Shamir heuristic \cite{fiatshamir} to make proofs of zero non-interactive. In place of a random oracle, we use a cryptographic hash function. We take the hash of some public parameters to prevent cheating by searching for some values that hash in a preferable manner. In this case, selecting $e = H(g, m, a)$ is a valid choice. To get a hash of desired length, an extendable output function such as SHAKE256 \cite{FIPS202} could be used. The library jsSHA \cite{jssha} provides an implementation of SHAKE256 that works within a browser. We can apply the Fiat-Shamir heuristic to make proofs of zero non-interactive \cite{fiatshamir}. In place of a random oracle, we use a cryptographic hash function. We take the hash of some public parameters to prevent cheating by searching for some values that hash in a preferable manner. In this case, selecting $e = H(g, m, a)$ is a valid choice. To get a hash of desired length, an extendable output function such as SHAKE256 could be used \cite{FIPS202}. The library jsSHA \cite{jssha} provides an implementation of SHAKE256 that works within a browser.
\section{Review} \section{Review}
@ -569,9 +676,7 @@ This is all in an ideal situation without compression or signatures: in the impl
\subsubsection{Time complexity} \subsubsection{Time complexity}
It is remarked that Paillier encryption performs considerably slower than RSA on all key sizes. \cite{paillier1999public} provides a table of theoretic results, suggesting that Paillier encryption can be over 1,000 times slower than RSA for the same key size. Theoretic timing results versus RSA are backed experimentally by my implementation. The following benchmarking code was executed.
Timing results versus RSA are backed experimentally by my implementation. The following benchmarking code was executed.
\begin{minted}{javascript} \begin{minted}{javascript}
console.log("Warming up") console.log("Warming up")
@ -595,27 +700,15 @@ Performing 250 Paillier encrypts required 47,000ms. On the other hand, performin
The speed of decryption is considerably less important in this circumstance, as Paillier ciphertexts are not decrypted during the execution of the program. The speed of decryption is considerably less important in this circumstance, as Paillier ciphertexts are not decrypted during the execution of the program.
\textbf{Public parameter.} The choice of the public parameter $g$ can improve the time complexity by removing the need for some large modular exponentiation. Selection of $g = n + 1$ is good in this regard, as binomial theorem allows the modular exponentiation $g^m \mod n^2$ to be reduced to the computation $1 + nm \mod n^2$. Some potential further optimisations to the implementation are as follows.
\textbf{Caching.} As the main values being encrypted are 0 or 1, a peer could maintain a cache of encryptions of these values and transmit these instantly. Caching may be executed in a background "web worker". A consideration is whether a peer may be able to execute a timing-related attack by first exhausting a peer's cache of a known plaintext value, and then requesting an unknown value and using the time taken to determine if the value was sent from the exhausted cache or not. \textbf{Caching.} As the main values being encrypted are 0 or 1, a peer could maintain a cache of encryptions of these values and transmit these instantly. Caching may be executed in a background "web worker". A consideration is whether a peer may be able to execute a timing-related attack by first exhausting a peer's cache of a known plaintext value, and then requesting an unknown value and using the time taken to determine if the value was sent from the exhausted cache or not.
Taking this idea further, one may simply cache $r^n$ for a number of randomly generated $r$ (as this is the slowest part of encryption). This eliminates the timing attack concern, and grants full flexibility with the values being encrypted.
\textbf{Alternative Paillier scheme.} \cite[Section~2.3.1]{Jurik2003ExtensionsTT} presents an optimised encryption scheme based on the subgroup of elements with Jacobi symbol $+1$. This forms a group as the Jacobi symbol is multiplicative, being a generalisation of the Legendre symbol.
Using this scheme alone reduced the time to encrypt by a half. Greater optimisations are possible through pre-computation of fixed-base exponentials, but the time and memory consumption of this exceeds what my browser was capable of for key sizes of 4096-bit and greater.
In practice, using the scheme alone gave gains close to a reduction by a third, since in the modified scheme additional computation must be performed to attain the $r$ that would work with normal Paillier, in order to perform the zero-knowledge proofs from before.
Using pre-computation on top of this yielded much greater gains, being around four times faster. This is likely due to avoiding the slowdown from general overhead around my JavaScript implementation of modular exponentiation.
Other research such as \cite{10.1145/3485832.3485842} suggests ways to speed up encryption further that could be utilised.
\textbf{Smaller key size.} The complexity of Paillier encryption increases with key size. Using a smaller key could considerably reduce the time taken \cite{paillier1999public}. \textbf{Smaller key size.} The complexity of Paillier encryption increases with key size. Using a smaller key could considerably reduce the time taken \cite{paillier1999public}.
I tested this on top of the alternative Paillier scheme from above. This resulted in linear reductions in encryption time: encryption under a 1024-bit modulus took a sixth of the amount of time as under a 2048-bit modulus, and encryption under a 2048-bit modulus took a sixth of the amount of time as under a 4096-bit modulus. I tested this on top of the alternative Paillier scheme from above. This resulted in linear reductions in encryption time: encryption under a 1024-bit modulus took a sixth of the amount of time as under a 2048-bit modulus, and encryption under a 2048-bit modulus took a sixth of the amount of time as under a 4096-bit modulus.
\textbf{Vectorised plaintexts.} The maximum size of a plaintext is $|n|$: in our case, this is 4096 bits. By considering this as a vector of 128 32-bit values, peers could use a single ciphertext to represent their entire state. \cite{10.1145/2809695.2809723} uses this process to allow embedded devices to make use of the homomorphic properties of Paillier. \textbf{Vectorised plaintexts.} The maximum size of a plaintext is $|n|$: in our case, this is 4096 bits. By considering this as a vector of 128 32-bit values, peers could use a single ciphertext to represent their entire state. This process is discussed as a way to allow embedded devices to use Paillier encryption \cite{10.1145/2809695.2809723}.
\hyperref[protocol1]{Protocol~\ref*{protocol1}} can be modified by instead testing that the given ciphertext is contained in a set of valid ciphertexts. There would still be a large number of Paillier encryptions required during this proof. \hyperref[protocol1]{Protocol~\ref*{protocol1}} can be modified by instead testing that the given ciphertext is contained in a set of valid ciphertexts. There would still be a large number of Paillier encryptions required during this proof.
@ -627,11 +720,13 @@ The other proofs do not translate so trivially to this structure however. In fac
All measurements were taken on Brave 1.50.114 (Chromium 112.0.5615.49) 64-bit, using a Ryzen 5 3600 CPU: a consumer CPU from 2019. Absolute timings are extremely dependent on the browser engine: for example Firefox 111.0.1 was typically 4 times slower than the results shown. All measurements were taken on Brave 1.50.114 (Chromium 112.0.5615.49) 64-bit, using a Ryzen 5 3600 CPU: a consumer CPU from 2019. Absolute timings are extremely dependent on the browser engine: for example Firefox 111.0.1 was typically 4 times slower than the results shown.
\begin{table}[htp] \begin{landscape}
\begin{table}
\fontsize{10pt}{10pt}\selectfont \fontsize{10pt}{10pt}\selectfont
\caption{Time to encrypt} \caption{Time to encrypt}
\label{table1} \label{table1}
\begin{tabularx}{\textwidth}{c *4{>{\Centering}X}} \begin{tabularx}{\hsize}{c *4{>{\Centering}X}}
\toprule \toprule
Modulus size & Na\"ive encrypt & Jacobi encrypt & Jacobi encrypt with pre-computation & RSA encrypt \\ Modulus size & Na\"ive encrypt & Jacobi encrypt & Jacobi encrypt with pre-computation & RSA encrypt \\
\midrule \midrule
@ -642,10 +737,10 @@ All measurements were taken on Brave 1.50.114 (Chromium 112.0.5615.49) 64-bit, u
\end{tabularx} \end{tabularx}
\end{table} \end{table}
\begin{table}[htp] \begin{table}
\fontsize{10pt}{10pt}\selectfont \fontsize{10pt}{10pt}\selectfont
\caption{Time\parnote{$|n| = 4096$ uses a less-optimised encryption method, as the browser frequently timed out attempting to pre-compute for the more-optimised version.} to process proofs} \caption{Time\parnote{$|n| = 4096$ uses a less-optimised encryption method, as the browser frequently timed out attempting to pre-compute for the more-optimised version.} to process proofs}
\begin{tabularx}{\textwidth}{c *6{>{\Centering}X}} \begin{tabularx}{\hsize}{c *6{>{\Centering}X}}
\toprule \toprule
\multirow{2}{*}{Modulus size} & \multicolumn{2}{c}{Proof-of-zero non-interactive} & \multicolumn{2}{c}{\hyperref[protocol1]{Protocol~\ref*{protocol1}} with $t = 24$} & \multicolumn{2}{c}{\hyperref[protocol1]{Protocol~\ref*{protocol1}} with $t = 48$} \tabularnewline \multirow{2}{*}{Modulus size} & \multicolumn{2}{c}{Proof-of-zero non-interactive} & \multicolumn{2}{c}{\hyperref[protocol1]{Protocol~\ref*{protocol1}} with $t = 24$} & \multicolumn{2}{c}{\hyperref[protocol1]{Protocol~\ref*{protocol1}} with $t = 48$} \tabularnewline
\cmidrule(l){2-3}\cmidrule(l){4-5}\cmidrule(l){6-7} \cmidrule(l){2-3}\cmidrule(l){4-5}\cmidrule(l){6-7}
@ -659,11 +754,11 @@ All measurements were taken on Brave 1.50.114 (Chromium 112.0.5615.49) 64-bit, u
\parnotes \parnotes
\end{table} \end{table}
\begin{table}[htp] \begin{table}
\fontsize{10pt}{10pt}\selectfont \fontsize{10pt}{10pt}\selectfont
\caption{Byte size\parnote{1 UTF-16 character, as used by ECMAScript \cite[Section~6.1.4]{ecma2024262}, is 2 or more bytes.} of encoded proofs} \caption{Byte size\parnote{1 UTF-16 character, as used by ECMAScript \cite[Section~6.1.4]{ecma2024262}, is 2 or more bytes.} of encoded proofs}
\label{table3} \label{table3}
\begin{tabularx}{\textwidth}{c *6{>{\Centering}X}} \begin{tabularx}{\hsize}{c *6{>{\Centering}X}}
\toprule \toprule
\multirow{2}{*}{Modulus size} & \multicolumn{2}{c}{Proof-of-zero non-interactive} & \multicolumn{2}{c}{\hyperref[protocol1]{Protocol~\ref*{protocol1}} with $t = 24$} & \multicolumn{2}{c}{\hyperref[protocol1]{Protocol~\ref*{protocol1}} with $t = 48$} \tabularnewline \multirow{2}{*}{Modulus size} & \multicolumn{2}{c}{Proof-of-zero non-interactive} & \multicolumn{2}{c}{\hyperref[protocol1]{Protocol~\ref*{protocol1}} with $t = 24$} & \multicolumn{2}{c}{\hyperref[protocol1]{Protocol~\ref*{protocol1}} with $t = 48$} \tabularnewline
\cmidrule(l){2-3}\cmidrule(l){4-5}\cmidrule(l){6-7} \cmidrule(l){2-3}\cmidrule(l){4-5}\cmidrule(l){6-7}
@ -677,13 +772,15 @@ All measurements were taken on Brave 1.50.114 (Chromium 112.0.5615.49) 64-bit, u
\parnotes \parnotes
\end{table} \end{table}
\end{landscape}
\subsection{Quantum resistance} \subsection{Quantum resistance}
Paillier is broken if factoring large numbers is computationally feasible \cite[Theorem~9]{paillier1999public}. Therefore, it is vulnerable to the same quantum threat as RSA is, which is described by \cite{shor_1997}. Alternative homomorphic encryption schemes are available, which are believed to be quantum-resistant, as they are based on lattice methods (e.g, \cite{fhe}). Paillier is broken if factoring large numbers is computationally feasible \cite[Theorem~9]{paillier1999public}. Therefore, it is vulnerable to the same quantum threat as RSA is, known as Shor's algorithm \cite{shor_1997}. Alternative homomorphic encryption schemes are available, which are believed to be quantum-resistant, as they are based on lattice methods (e.g, \cite{fhe}).
\subsection{Honest-verifier} \subsection{Honest-verifier}
\cite[Section~5.2]{damgard2003} is honest-verifier. However, applying the Fiat-Shamir heuristic converts such a proof into a general zero-knowledge proof \cite[Section~5]{fiatshamir}. This means that, supposing the choice of transform used is appropriate, \hyperref[protocol1]{Protocol~\ref*{protocol1}} should also be general zero-knowledge. However, the interactive proofs performed as part of the game are still only honest-verifier, and a malicious verifier may be able to extract additional information from the prover (such as the blinding value used: this is stated in \cite[Section~5.2]{damgard2003}). The proof of zero is honest-verifier \cite[Section~5.2]{damgard2003}. However, applying the Fiat-Shamir heuristic converts such a proof into a general zero-knowledge proof \cite[Section~5]{fiatshamir}. This means that, supposing the choice of transform used is appropriate, \hyperref[protocol1]{Protocol~\ref*{protocol1}} should also be general zero-knowledge. However, the interactive proofs performed as part of the game are still only honest-verifier, and a malicious verifier may be able to extract additional information from the prover (such as the blinding value used).
\section{Wider application} \section{Wider application}
@ -700,7 +797,7 @@ This is not without its downsides: I found that the complexity of P2P networking
\subsection{Decentralised social media} \subsection{Decentralised social media}
The schemes presented here and in \cite{damgard2003} could be applies to the concept of a decentralised social media platform. Such a platform may use zero-knowledge proofs as a way to allow for "private" profiles: the content of a profile may stay encrypted, but zero-knowledge proofs could be used as a way to allow certain users to view private content in a manner that allows for repudiation, and disallows one user from sharing private content to unauthorised users. The schemes presented here could be applies to the concept of a decentralised social media platform. Such a platform may use zero-knowledge proofs as a way to allow for "private" profiles: the content of a profile may stay encrypted, but zero-knowledge proofs could be used as a way to allow certain users to view private content in a manner that allows for repudiation, and disallows one user from sharing private content to unauthorised users.
The obvious issue is P2P data storage. Users could host their own platforms, but this tends to lead to low adoption due to complexity for normal people. IPFS is a P2P data storage protocol that could be considered. This poses an advantage that users can store their own data, if they have a large amount, but other users can mirror data effectively to protect against outages. The amount of storage can grow effectively as more users join the network. The obvious issue is P2P data storage. Users could host their own platforms, but this tends to lead to low adoption due to complexity for normal people. IPFS is a P2P data storage protocol that could be considered. This poses an advantage that users can store their own data, if they have a large amount, but other users can mirror data effectively to protect against outages. The amount of storage can grow effectively as more users join the network.
@ -708,7 +805,7 @@ The obvious issue is P2P data storage. Users could host their own platforms, but
The ability to prove the contents of a dataset to a second party without guaranteeing authenticity to a third party is another potential application of the protocol presented. Handling of confidential data is a critical concern for pharmaceutical companies, where a data leak imposes serious legal and competitive consequences for the company. A second party does however need some guarantee that the data received is correct. Proofs are one way of achieving this, although other techniques such as keyed hashing may be more effective. The ability to prove the contents of a dataset to a second party without guaranteeing authenticity to a third party is another potential application of the protocol presented. Handling of confidential data is a critical concern for pharmaceutical companies, where a data leak imposes serious legal and competitive consequences for the company. A second party does however need some guarantee that the data received is correct. Proofs are one way of achieving this, although other techniques such as keyed hashing may be more effective.
Another consideration in this domain is the use of homomorphic encryption schemes to allow a third party to process data without actually viewing the data. This protects the data from viewing by the third party, and the processing methods from viewing by the first party. \cite{10.1145/3485832.3485842} states for example that common statistical functions such as regression can be performed on data that is encrypted under the Paillier scheme. Another consideration in this domain is the use of homomorphic encryption schemes to allow a third party to process data without actually viewing the data. This protects the data from viewing by the third party, and the processing methods from viewing by the first party. For example, common statistical functions such as regression can be performed on data that is encrypted under fully homomorphic encryption schemes.
\section{Limitations} \section{Limitations}