....

2023-04-07 18:59:33 +01:00
parent cc2e1a618e
commit 5557a8bff6
4 changed files with 45 additions and 60 deletions
--- a/static/js/modules/crypto/paillier.js
+++ b/static/js/modules/crypto/paillier.js
@ -13,16 +13,17 @@ class Cyphertext {
        }
        // Compute g^m by binomial theorem.
-        let gm = (1n + key.n * plainText) % key.n ** 2n;
+        let gm = (1n + key.n * plainText) % key.n2;
        // Compute g^m r^n from crt.
-        this.cyphertext = (gm * mod_exp(r, key.n, key.n ** 2n)) % key.n ** 2n;
+        this.cyphertext = (gm * mod_exp(r, key.n, key.n2)) % key.n2;
        // Force into range.
        while (this.cyphertext < 0n) {
-            this.cyphertext += key.n ** 2n;
+            this.cyphertext += key.n2;
        }
        console.log(performance.now());
        this.r = r;
        this.pubKey = key;
        this.plainText = plainText;
@ -130,18 +131,14 @@ class ProofSessionVerifier {
        if (gcd(this.a, this.cipherText.pubKey.n) !== 1n) return -3;
        // check exp
-        return mod_exp(
+        return mod_exp(proof, this.cipherText.pubKey.n, this.cipherText.pubKey.n2) ===
            proof,
            this.cipherText.pubKey.n,
            this.cipherText.pubKey.n ** 2n
        ) ===
            (this.a *
                mod_exp(
                    this.cipherText.cyphertext,
                    this.challenge,
-                    this.cipherText.pubKey.n ** 2n
+                    this.cipherText.pubKey.n2
                )) %
-                this.cipherText.pubKey.n ** 2n
+                this.cipherText.pubKey.n2
            ? 1
            : -4;
    }
@ -152,6 +149,7 @@ window.ReadOnlyCyphertext = ReadOnlyCyphertext;
 export class PaillierPubKey {
    constructor(n) {
        this.n = n;
        this.n2 = this.n ** 2n;
        this.g = this.n + 1n;
    }
@ -173,14 +171,14 @@ export class PaillierPubKey {
 class PaillierPrivKey {
    constructor(p, q) {
        this.n = p * q;
        // precompute square of n
        this.n2 = this.n ** 2n;
        this.lambda = (p - 1n) * (q - 1n);
        this.mu = mod_exp(this.lambda, this.lambda - 1n, this.n);
    }
    decrypt(c) {
-        return (
+        return (((mod_exp(c, this.lambda, this.n2) - 1n) / this.n) * this.mu) % this.n;
            (((mod_exp(c, this.lambda, this.n ** 2n) - 1n) / this.n) * this.mu) % this.n
        );
    }
 }
--- a/whitepaper/Dissertation.bib
+++ b/whitepaper/Dissertation.bib
@ -318,4 +318,6 @@ doi={10.1109/SP.2014.36}}
 	author={Fabrice Boudot},
 	booktitle={International Conference on the Theory and Application of Cryptographic Techniques},
 	year={2000}
-}
+}
@misc{projectgemini, url={gemini://gemini.circumlunar.space/docs/specification.gmi}, journal={Project gemini}}
--- a/whitepaper/Dissertation.pdf
+++ b/whitepaper/Dissertation.pdf
--- a/whitepaper/Dissertation.tex
+++ b/whitepaper/Dissertation.tex
@ -114,7 +114,7 @@ If both parties were to collude and generate non-randomly, this protocol falls t
 \subsection{Zero-knowledge proofs}
-Zero-knowledge proofs form a subset of minimum disclosure proofs, and beyond that, a subset of interactive proofs. Zero-knowledge proofs are defined by three axioms: \begin{itemize} %todo ref
+Zero-knowledge proofs form a subset of minimum disclosure proofs, and beyond that, a subset of interactive proofs. Zero-knowledge proofs are typically defined by three properties: \begin{itemize} %todo ref
    \item \textbf{Completeness.} If the conjecture is true, an honest verifier will be convinced of its truth by a prover.
    \item \textbf{Soundness.} If the conjecture is false, a cheating prover cannot convince an honest verifier (except with some small probability).
    \item \textbf{Zero-knowledge.} This is the condition for a minimum disclosure proof to be considered zero-knowledge. If the conjecture is true, the verifier cannot learn any other information besides the truthfulness.
@ -221,36 +221,6 @@ The Fiat-Shamir heuristic \cite{fiatshamir} provides another method to reduce co
 Another approach to the problem is to use set membership, which is a widely considered problem in zero-proof literature. In this case, each region would be associated with a set of units from a public "pool" of units. Then, a player needs to prove the cardinality of a set, and the uniqueness/distinctness of its members. A number of constructs exist for analysing and proving in obscured sets.
 \subsubsection{Accumulators}
 Defined by \cite{10.1007/3-540-48285-7_24}, accumulators form a subset of one-way hash functions that satisfy a \textit{quasi-commutative} property: that is, for some hash function $h$, $h(h(x_1, y_1), y_2) = h(h(x_1, y_2), y_1)$.
 \cite{10.1007/3-540-48285-7_24} also proved that such functions exist, by providing an example based on modular arithmetic. They then used these to construct set membership proofs as follows: \begin{itemize}
    \item Take $s_1, \dots, s_n$ a set of users who wish to identify each other, and $P_k$ a public key.
    \item Each user $s_i$ computes $z = h( h( h(P_k, s_1), \dots), s_n)$ and $z_i = h( h( h(P_k, s_1), \dots), s_n)$ omitting $s_i$.
    \item For a user to validate their membership to another user, they publish $(z_i, s_i)$.
 \end{itemize}
 \subsubsection{Merkle trees}
 Merkle trees \cite{merkle} provide an alternative way of proving set membership, that is more space efficient than accumulators, and doesn't require special hashing functions (any one-way function will work). A Merkle tree stores the hashes of some data in the leaf nodes, and each node above stores the hash of the two nodes below it. The commitment is then the hash of the topmost node.
 With this scheme, the data stored in the leaf nodes is totally obscured. However, the constructor of the tree can demonstrate to another user the presence of some data in the tree by revealing the hashes of a subset of the other nodes in the tree. They can also reveal the tree's structure without revealing any contents by revealing all hashes constituting the tree.
 Whilst this would be useful in a Risk version in which a player never exposed their unit count, and simply wagered units on an attack; it doesn't apply well to the intended scenario of privately communicating unit counts, as the hash function used is well-known, and so proofs to a single player can easily be replicated by a malicious verifier to other players in the game.
 To overcome this issue we want to devise some zero-knowledge system for proving set size. It is then beneficial to consider a public set $U$ containing all of a player's possible units.
 \subsubsection{Blind signatures}
 \cite{blindsig} describes a process of a blind signature, in which a message is signed without the contents being revealed to the signer. This requires some signing function $S$ which commutes with an encrypting function $E$, i.e $E^{-1}(S^{-1}(E(m))) = S^{-1}(m)$.
 \cite{10.1007/978-3-540-89255-7_15} demonstrates how blind signatures can be used to construct zero-knowledge set membership proofs for some element $\sigma$ in a public set $\Phi$, using pairing-based cryptography.
 Blind signatures can also be performed with RSA \cite{bellare2003one}. In RSA-based blind signatures, the signing party computes primes $p_A, q_A$ and exponents $d, e$ such that $(m^d)^e \equiv m \mod p_Aq_A$. The 2-tuple $(p_Aq_A, e)$ is the public key, and is released publicly. The other party computes a random value $R$, and computes and publishes $B = m \cdot R^e \mod p_Aq_A$ for some message $m$. The signing party then replies with $B^d = (m \cdot R^e)^d \equiv m^d \cdot R \mod p_Aq_A$, so that the other party can then extract $m^d$ as $R$ is known only to them. Due to the discrete logarithm problem, determining the signing key $d$ from this is not computationally feasible. Similarly, it is not feasible for the signer to determine $m$, as $R$ is not known to them.
 RSA blinding can incur a security risk, as by using the same keys to sign and encrypt, a player can be tricked into revealing their private key through a chosen-plaintext attack.
 \section{Implementation}
 The implementation provided uses WebSockets as the communication primitive. This is therefore a centralised implementation. However, no verification occurs in the server code, which instead simply "echoes" messages received to all connected clients.
@ -267,20 +237,19 @@ In particular, the final point allows for the use of purely JSON messages, which
 Messages are given a fixed structure to make processing simpler. Each JSON message holds an \texttt{author} field, being the sender's ID; a message ID to prevent replay attacks and associate related messages; and an \texttt{action}, which at a high level dictates how each client should process the message.
-The action more specifically is one of \texttt{ANNOUNCE}, \texttt{DISCONNECT}, \texttt{KEEPALIVE}, \texttt{RANDOM}, and \texttt{ACT}. The first three of these are used for managing the network by ensuring peers are aware of each other and know the state of the network. \texttt{RANDOM} is designated to be used by the shared-random-value subprotocol defined later. \texttt{ACT} is used by players to submit actions for their turn during gameplay.
+The action more specifically is one of \texttt{ANNOUNCE}, \texttt{DISCONNECT}, \texttt{KEEPALIVE}, \texttt{RANDOM}, \texttt{PROOF}, and \texttt{ACT}. The first three of these are used for managing the network by ensuring peers are aware of each other and know the state of the network. \texttt{RANDOM} and \texttt{PROOF} are designated to be used by sub-protocols defined later on. \texttt{ACT} is used by players to submit actions for their turn during gameplay.
 Each message is also signed to verify the author. This is a standard application of RSA. A hash of the message is taken, then encrypted with the private key. This can be verified with the public key.
-RSA keys are accepted by peers on a first-seen basis.
+Players trust RSA keys on a trust-on-first-use (TOFU) basis. TOFU is the same protocol as used by Gemini \cite{projectgemini}. The main issue with TOFU is that if a malicious party intercepts the first communication, they may substitute the RSA credentials transmitted by the intended party, resulting in a man-in-the-middle attack.
-\subsection{Paillier}
+\subsection{Paillier cryptosystem}
 Paillier requires the calculation of two large primes for the generation of public and private key pairs. ECMAScript typically stores integers as floating point numbers, giving precision up to $2^{53}$. This is clearly inappropriate for the generation of sufficiently large primes.
-In 2020,
+In 2020, ECMAScript introduced \texttt{BigInt} \cite{tc39}, which are, as described in the spec, "arbitrary precision integers". Whilst this does not hold true in common ECMAScript implementations (such as Chrome's V8), these "big integers" still provide sufficient precision for the Paillier cryptosystem, given some optimisations and specialisations are made with regards to the Paillier algorithm and in particular the modular exponentiation operation.
 ECMAScript introduced \texttt{BigInt} \cite{tc39}, which are, as described in the spec, "arbitrary precision integers". Whilst this does not hold true in common ECMAScript implementations (such as Chrome's V8), these "big integers" still provide sufficient precision for the Paillier cryptosystem, given some optimisations and specialisations are made with regards to the Paillier algorithm and in particular the modular exponentiation operation.
-It must be noted that \texttt{BigInt} is inappropriate for cryptography in practice, due to the possibility of timing attacks as operations are not necessarily constant time \cite{tc39}. In particular, modular exponentiation is non-constant time, and operates frequently on secret data. A savvy attacker may be able to use this to leak information about an adversary's private key.
+It must be noted that \texttt{BigInt} is inappropriate for cryptography in practice, due to the possibility of timing attacks as operations are not necessarily constant time \cite{tc39}. In particular, modular exponentiation is non-constant time, and operates frequently on secret data. A savvy attacker may be able to use this to leak information about an adversary's private key; however, as decryption is not performed, this risk is considerably reduced as there is less need to perform optimisations based on Chinese remainder theorem which would require treating the modulus $n$ as its two components $p$ and $q$.
 \subsection{Modular exponentiation}
@ -405,7 +374,10 @@ Players should prove a number of properties of their game state to each other to
 \subsection{Range proof}
-\cite{Boudot2000EfficientPT}'s proof is a multi-round proof more similar to %todo
+\cite{Boudot2000EfficientPT}'s proof is a multi-round proof more similar in structure to the graph isomorphism proof presented in \cite{10.1145/116825.116852}. We select public parameter $\ell$ to be some sufficiently high value that a player's unit count should not exceed during play: an appropriate choice may be 1000. Select $n$ as the number of units that the player is defending with, or in the case of attacking, let $n$ be the number of units that the player is attacking with plus 1 (as is required by the rules of Risk).
 To reduce the number of times the proof must be conducted, we use the Fiat-Shamir heuristic, with the shared random values scheme %todo move this
 acting as the random oracle.
 \subsection{Cheating with negative values}
@ -543,7 +515,7 @@ On the other hand, \hyperref[protocol1]{Protocol~\ref*{protocol1}} requires mult
 This could be overcome by reducing the number of rounds, which comes at the cost of increasing the probability of cheating. In a protocol designed to only facilitate a single game session, this may be acceptable to the parties involved. For example, reducing the number of rounds to 19 will increase the chance of cheating to $\left(\frac{1}{2}\right)^{-19} \approx 1.9 \times 10^{-6}$, but the size would reduce considerably to $\sim$770kB.
-This is all in an ideal situation without compression: in the implementation presented, the serialisation of a ciphertext is larger than this, since it serialises to a string of the hexadecimal representation. Compression shouldn't be expected to make a considerable difference, as the ciphertexts should appear approximately random.
+This is all in an ideal situation without compression or signatures: in the implementation presented, the serialisation of a ciphertext is larger than this, since it serialises to a string of the hexadecimal representation and includes a digital signature for authenticity. Compression shouldn't be expected to make a considerable difference, as the ciphertexts should appear approximately random.
 The size of the proof of zero communication is, in total, $3290 + 1744 + 2243$ characters, i.e $\sim$7.3kB. This is about 2-3 times larger than the ideal size. A solution to this is to use a more compact format, for example msgpack \cite{msgpack} (which also has native support for binary literals).
@ -551,9 +523,7 @@ The size of the proof of zero communication is, in total, $3290 + 1744 + 2243$ c
 It is remarked that Paillier encryption performs considerably slower than RSA on all key sizes. \cite{paillier1999public} provides a table of theoretic results, suggesting that Paillier encryption can be over 1,000 times slower than RSA for the same key size.
-\cite{paillier1999public} also remarks that the choice of the public parameter $g$ can improve the time complexity. The selection of $g = n + 1$ is optimal in this regard, as binomial theorem allows the modular exponentiation $g^m \mod n^2$ to be reduced to the computation $1 + gm \mod n^2$.
+Timing results versus RSA are backed experimentally by my implementation. The following benchmarking code was executed.
 These results are backed experimentally by my implementation. The following benchmarking code was executed.
 \begin{minted}{javascript}
    console.log("Warming up")
@ -573,14 +543,29 @@ These results are backed experimentally by my implementation. The following benc
    console.log(performance.measure("duration", "start", "end").duration)
 \end{minted}
-Performing 250 Paillier encrypts required 49,100ms. On the other hand, performing 250 RSA encrypts required 60ms. This is a difference of over 1,000 times.
+Performing 250 Paillier encrypts required 48,800ms. On the other hand, performing 250 RSA encrypts required just 60ms.
-Decryption is remarked as being optimisable to constant time through application of Chinese Remainder Theorem.
+The speed of decryption is considerably less important in this circumstance, as Paillier ciphertexts are not decrypted during the execution of the program.
 There is little room for optimisation of the mathematics in Paillier encryption. Some possibilities are discussed below.
 \textbf{Public parameter.} The choice of the public parameter $g$ can improve the time complexity by removing the need for some large modular exponentiation. Selection of $g = n + 1$ is good in this regard, as binomial theorem allows the modular exponentiation $g^m \mod n^2$ to be reduced to the computation $1 + nm \mod n^2$.
 \textbf{Smaller key size.} The complexity of Paillier encryption increases with key size. Using a smaller key could considerably reduce the time taken \cite{paillier1999public}.
 \textbf{Pre-computation.} As the main values being encrypted are 0 or 1, a peer could pre-compute the encryptions of these values and transmit these instantly. Pre-computation may be executed in a background "web worker". A consideration is whether a peer may be able to execute a timing-related attack by first exhausting a peer's pre-computed cache of a known value, and then requesting an unknown value and using the time taken to determine if the value was sent from the exhausted cache or not.
 Taking this idea further, one may simply pre-compute $r^n$ for a number of randomly generated $r$ (as this is the slowest part of encryption). This eliminates the timing attack concern, and grants full flexibility with the values being encrypted.
 \textbf{Restructuring plaintexts.} The maximum size of a plaintext is $|n|$: in our case, this is 4096 bits. By considering this as a vector of 128 32-bit values, peers could use a single ciphertext to represent their entire state. \hyperref[protocol1]{Protocol~\ref*{protocol1}} can be modified by instead testing that the given ciphertext is contained in a set of valid ciphertexts. There would still be a large number of Paillier encryptions required during this proof.
 The other proofs do not translate so trivially to this structure however. In fact, in some contexts the proofs required may be considerably more complicated, becoming round-based proofs which may be slower and use more Paillier encryptions to achieve the same effect.
 \textbf{Optimising language.} An optimising language may be able to reduce the time taken to encrypt. On the browser, this could involve using WASM as a way to execute compiled code within the browser, although WASM does not always outperform JavaScript.
 \subsection{Quantum resistance}
-Some of the security of Paillier relies upon the difficulty of factoring large numbers. Therefore, it is vulnerable to the same quantum threat as RSA is, which is described by \cite{shor_1997}. Alternative homomorphic encryption schemes are available, which are widely believed to be quantum-resistant, as they are based on lattice methods (e.g, \cite{fhe}).
+The security of Paillier relies upon the difficulty of factoring large numbers \cite{paillier1999public}. Therefore, it is vulnerable to the same quantum threat as RSA is, which is described by \cite{shor_1997}. Alternative homomorphic encryption schemes are available, which are widely believed to be quantum-resistant, as they are based on lattice methods (e.g, \cite{fhe}).
 \subsection{Side-channels}
@ -613,7 +598,7 @@ Another consideration in this domain is the use of fully-homomorphic encryption
 \section{Limitations}
-Finally, I present the limitations that I encountered.
+Finally, I present a summary of other limitations that I encountered.
 \subsection{JavaScript}