#include #include #define wb 30 #define C const #define ug 0 #define stat 0 #define fs 1 #define aQ 0 #define cs 0 static int ugg=0; static int tc=-1; typedef long long B; static int C msk = (1<>wb;} M[4] = cry;} static int lh; static void plh(int h){if(h) lh=1; if(lh) printf("%X", h);} static void pr(int C * C x, int C l, char C * C m){if(1){int j=l; while(j--) printf("%08X ", x[j]); printf("%s\n", m);} if(0) {int j = (l*wb+3)/4; lh=0; printf("(string->number \""/*)*/); --j; plh(x[j*4/wb] >> (j*4%wb)); while(j--){int wx = j*4/wb, sa = j*4%wb; plh(15&((sa+4>wb?(x[wx+1]<<(wb-sa)):0) | (x[wx]>>sa)));} printf(/*(*/"\" 16) = %s\n", m); }} static void ml(int C * C M, int C m, int C * C N, int C n, int * C P){ int p; B cry = 0; for(p=0; p< m+n; ++p){B lo = cry, hi = 0; int i=0, j=p; while(j>=0) {if(i >= 0 && j >= 0 && i < m && j < n) {B pr = (B)M[i]*N[j]; lo += pr&msk; hi += pr >> wb; if(0) if(hi<0) {printf("pr = %llX\n", pr); exit(0);}} ++i; --j;} cry = hi + (lo >> wb); if(lo<0 || hi<0) {printf("hi = %llX, lo = %llx\n", hi, lo); pr(M, m, "M"); pr(N, n, "N"); exit(0);} P[p] = lo&msk;} if(0) printf("cry=%llX, %d\n", cry, p); P[p-1] += cry<> wb;} return cry;} static int r[4]; // Recprocal of modulus static int C * m; // The modulus static int ms; // 3 - ((Words in modulus)+3)%4 static int J; // J+(32-wb) leading zeros in high word of m. static int * Q = 0; // pointer to quotient for testing. static void inv(int C * C a){ int lz=0; int N[6]; // normalized input int Z[5]; // Numerator {int j=5; while(j--) Z[j] = msk;} {int j = 4; while(j--) if(a[j-3] & ~ msk) {printf("Boo: %d\n", j); pr(a-3, 4, "oucg"); exit(0);}} if(!a[0]) {printf("inv requires non-zero leading word\n"); exit(0);} while(msk > (a[0]<<(lz+1))) ++lz; {int k=4; while (k--) N[k] = (a[k-3] << lz)&msk | (a[k-4] >> (wb-lz));} J = lz; {int r0 = // N[3] == 1<<(wb-1)?0x7FFFFFFF:(1LL<<(2*wb))/N[3]; (1LL<<(2*wb))/(N[3]+1); if(0){int ans[5]; ml(&r0, 1, Z, 4, ans); if(ug) pr(ans, 5, "zzg");} mm(r0, Z, Z); mm(r0, N, N); if(ug) {pr(Z, 5, "Z1"); pr(N, 5, "N1");} {int k=4; while(k--){int m[5]; mm(Z[4], N+1, m); r[k] = Z[4]; sub(Z, m, 5); if(ug) pr(Z, 5, "Z2"); N[5] = 0; while(Z[4]) {sub(Z, N+1, 5); if(ug) pr(Z, 5, "Z3"); ++r[k]; if(ug) pr(N+1, 5, "ww");} {int j = 4; while(j--) Z[j+1] = Z[j];} }}} if(ugg) pr(r, 4, "r");} static int m4(int C * C X, int C Y[4], int L, int * acc){ // acc += Y*X; int y0=Y[0], y1=Y[1], y2=Y[2], y3=Y[3]; int x0=X[0], x1=X[1], x2=X[2], x3=X[3]; B p = (B)x0*y0 + acc[0]; acc[0] = p&msk; p = (B)x1*y0 + (B)x0*y1 + acc[1] + (p>>wb); acc[1] = p&msk; p = (B)x2*y0 + (B)x1*y1 + (B)x0*y2+ acc[2] + (p>>wb); acc[2] = p &msk; {int j; for (j= 3; j < L; ++j){ p = (B)x3*y0 + (B)x2*y1 + (B)x1*y2 + (B)x0*y3 + acc[j] + (p>>wb); acc[j] = p&msk; x0=x1; x1=x2; x2=x3; x3=X[j+1];}} p = (B)x2*y1 + (B)x1*y2 + (B)x0*y3 + acc[L] + (p>>wb); acc[L] = p & msk; p = (B)x2*y2 + (B)x1*y3 + acc[L+1] + (p>>wb); acc[L+1] = p & msk; p = (B)x2*y3 + acc[L+2] + (p>>wb); acc[L+2] = p & msk; {int s = acc[L+3] + (p >> wb); acc[L+3] = s&msk; return s>>wb;} } static int ups=0, downs=0, reducs=0; static void red(int L, int * C acc, int C qi){ // Reduce acc by modulus int qe[4], d[4]; if(0) if(acc[3-ms+L] & ~(msk>>J)) {printf("DivideCheck\n"); exit(0);} {int j=4; while(j--) d[j]= (acc[j-ms+L] << J)&msk | acc[j-ms+L-1] >> (wb-J);} d[3] |= J?acc[3-ms+L]<>wb); qe[0] = -(msk&p); p = (B)r[2]*d[3] + (B)r[3]*d[2] + (p>>wb); qe[1] = -(msk&p); p = (B)r[3]*d[3] + (p>>wb); qe[2] = -(msk&p); qe[3] = -(p>>wb);} if(qe[3] > 0) {pr(acc, L, "accRd"); printf("Routine red requires that acc not exceed 2^(4*wb) * mod!\n"); exit(0);} if(ugg) {pr(d, 4, "d"); pr(qe, 4, "qe");} {int w=m4(m, qe, L, acc); if(Q) {int j=4; while(j--) Q[j+qi] = -qe[j];} if(ugg) {int zz[L+4]; int Qe[4]; {int j=4; while(j--) Qe[j] = -qe[j];} ml(m, L, Qe, 4, zz); pr(zz, L+4, "zz"); pr(acc, L+4, "pacc");} if(stat) ++reducs; { // Insure that acc is really less than modulus. while(w) {int c=0; int j; for(j=0; j>wb;} for(j=L; j>wb;} w = !c; if(stat) ++ups; if(ugg) pr(acc, L+4, "upit"); if(Q) --*(Q+qi);} while(acc[L-ms]) {acc[L] += sub(acc, m, L); if(ugg) pr(acc, L+4, "gg"); if(Q) ++*(Q+qi); if(stat) ++downs;} while(1){int j=L-ms; while(j--) { if(acc[j] < m[j]) goto end; if(m[j] < acc[j]) break;} za(sub(acc, m, L-ms), 2); if(Q) ++*(Q+qi); if(stat) ++downs; if(ugg) pr(acc, L+4, "Puff"); } end: ; }} } static void checkM(int C * C X, int C * C Y, int C * C acc, int L) {int c = 0; int j; if(0){putc('.', stdout); fflush(stdout);} if(0) printf("%d\n", tc); if(ugg) pr(Q, L, "Q1"); for(j=0; j> wb; Q[j] &= msk;} else {int C e = Q[j]+c; Q[j] = e&msk; c = e>>wb;} {int fp[2*L], fq[2*L]; ml(X, L, Y, L, fp); ml(Q, L, m, L, fq); if(ugg) pr(Q, L, "quo"); if(0) {pr(fp, 2*L, "fp"); pr(fq, 2*L, "fq");} za(sub(fp, fq, 2*L), 4); {int j=L; while(j--) if(fp[L+j]) {pr(fp, 2*L, "itchp"); pr(fq, 2*L, "itchq"); printf("Jam %d", tc); exit(0);}} {int j=L; while(j--) if(fp[j] != acc[j]) {printf("Jam2\n"); pr(fp, L, "fp2"); ml(X, L, Y, L, fp); pr(fp, L, "fp1"); pr(acc, L+4, "acc"); pr (Q, L, "Q"); pr(X, L, "X"); pr(Y, L, "Y"); exit(0);}}}} static void multMod(int C * C X, int C * C Y, int * C P, int C L){ int acc[L+4]; // We choose Y as the multiplier. if(L&3){printf("L must be a multiple of 4.\n"); exit(0);} {int j=L+4; while(j--) acc[j] = 0;} if(ugg) {pr(X, L, "X"); pr(Y, L, "Y");} if(0) {int zx[16]; ml(X, 12, Y+8, 4, zx); pr(zx, 16, "zx");} {int mp = L; while(mp-=4, mp>=0) { if(m4(X, Y+mp, L, acc)) {sub(acc+4, m, L); if(Q) ++Q[mp+4]; if(ugg) printf("Hic\n");} if(ugg) pr(acc, L+4, "Pacc"); red(L, acc, mp); if(mp) { // Shift left 4 words. int j = L; while(j--) acc[j+4] = acc[j]; j=4; while(j--) acc[j] = 0; if(ugg) pr(acc, L+4, "acc2");} } if(Q) checkM(X, Y, acc, L); {int j = L; while(j--) P[j] = acc[j];}} } static void sqrMod(int C * C X, int * C P, int C L){int acc[2*L]; {int j = 2*L; while(j--) acc[j] = 0;} {int j = L; while(j -= 4) {int c = m4(X, X+j, j, acc+j); {int w=2*j+4; while(c && w<2*L) { int s = c+acc[w]; acc[w++]=s&msk; c = s>>wb;}}}} {int j, c=0; for(j=0; j<2*L; ++j) {int s = (acc[j] << 1) + c ; acc[j] = s&msk; c = s >> wb;}} {int j; B p = 0; for(j=0; j>wb); acc[2*j] = p&msk; p = ((B)X[j+1]*X[j] << 1) + (p >> wb) + acc[2*j+1]; acc[2*j+1] = p&msk; p = ((B)X[j+2]*X[j] << 1) + (B)X[j+1]*X[j+1] + (p >> wb) + acc[2*j+2]; acc[2*j+2] = p&msk; p = (((B)X[j+3]*X[j] + (B)X[j+1]*X[j+2]) << 1) + (p >> wb) + acc[2*j+3]; acc[2*j+3] = p&msk; p = ((B)X[j+3]*X[j+1] << 1) + (B)X[j+2]*X[j+2] + (p >> wb) + acc[2*j+4]; acc[2*j+4] = p&msk; p = ((B)X[j+3]*X[j+2] << 1) + (p >> wb) + acc[2*j+5]; acc[2*j+5] = p&msk; p = (B)X[j+3]*X[j+3] + (p >> wb) + acc[2*j+6]; acc[2*j+6] = p&msk; p = acc[2*j+7] + (p >> wb); acc[2*j+7] = p&msk;} if(p>>wb) {printf("xxx%llx\n", p); exit(0);} } if(ugg){pr(X, L, "X"); pr(acc, 2*L, "X^2");} if(cs){int sq[2*L]; ml(X, L, X, L, sq); {int j=2*L; while(j--) if(sq[j] != acc[j]) {printf("FOO j=%d\n", j); pr(X, L, "root"); pr(acc, 2*L, "r^2"); pr(sq, 2*L, "rr"); exit(0);}}} {int j = L; while(j -= 4, j+4) red(L, acc+j, j);} if(Q) checkM(X, X, acc, L); {int j = L; while(j--) P[j] = acc[j];} if(cs) {{int j = L; while (j--) if(acc[L+j]) {printf("Bamx\n"); exit(0);}} {int j=L; while(j--) {if(acc[j] > m[j]) {printf("Bamy\n"); exit(0);} if(acc[j] < m[j]) goto ex;} {printf("Bamz\n"); exit(0);} ex: ;}} if(ugg) pr(P, L, "moded"); } void modPow(int C * C X, int C * C E, int * C A, int C * C M, int C L){ int P[L]; int vo = 1; if(aQ) Q = malloc(2*4*L); if(L%4) {printf("L must be divisible by 4\n"); exit(0);} {int j=4; while(j--) if(M[L+j-4]){ms = 3-j; break;}} m = M; if(0){printf("L = %d, ms = %d\n", L, ms); pr(M, 12, "m");} if(ugg) pr(M, L, "mod"); inv(M+L-1-ms); if(ugg) printf("J = %d, ms=%d, wb=%d, L=%d\n", J, ms, wb, L); {int j=L; while(j--) {int k = wb; while(k--) {if(!vo) {if (fs) sqrMod(P, P, L); else multMod(P, P, P, L);} if(E[j] & (1 << k)) { if(vo) {int n=L; while(n--) P[n] = X[n]; vo=0;} else multMod(P, X, P, L);} }}} {int j=L; while(j--) A[j] = vo?!j:P[j];} if(Q) free(Q); } #if 1 void set(int * x, int c, int z){while(c--) x[c] = c>=wb;}} else x[0] = X; modPow(x, e, p, m, 12); printf("%d^%d = ", X, E); pr(p, 12, "p");} int main(){ if(0){int k=4; while(k--) {int j=4; while(j--) pt(k-4, j);}} if(1){ #include "xx.h" static int wrk[4*Ss]; if(1) modPow(b, e, wrk, m, 4*Ss); else {int j = 20; while(j--) {modPow(b, e, wrk, m, 4*Ss); printf("j=%d\n", j);}} pr (wrk, 4*Ss, " ans"); {int j = 4*Ss; while(j--) if(wrk[j] != a[j]) {printf("Discrepancy at %d\n", j); exit(0);} printf("Goodness!\n");}} if(0){ #define y 36 #define z 3 // Modulus size is y-z. y must be multiple of 4. int mod[y], a[y], b[y], p[y]; if(1) {int j=y; while(j--) {a[j] = rand(); b[j] = rand(); mod[j] = rand();}} set(mod, y, y-z); mod[y-z-1]= msk-43; do set(a, y, y-z); while(a[y-1-z] >= mod[y-1-z]); do set(b, y, y-z); while(b[y-1-z] >= mod[y-1-z]); pr(a, y, "a"); pr(b, y, "b"); pr(mod, y, "Mod"); m = mod; ms=z; inv(mod+y-z-1); if(ugg) printf("J = %d, ms=%d, wb=%d\n", J, ms, wb); multMod(a, b, p, y); pr(p, y, "ans"); } while(0){int a[y], b[y], M[y], p[y]; if(!--tc) ugg=1; if(!(tc%10)) {if (stat) printf("tc = %d %d ups, %d downs in %d\n", tc, ups, downs, reducs); else printf("tc = %d\n", tc);} ms = rand()&3; {int j; do j = rand()&31; while(j>=wb); {int hm = (1<<(wb-j))-1; M[y-1-ms] = rand()&hm | (1<<(wb-j-1)); do a[y-1-ms] = rand()&hm; while(a[y-1-ms] >= M[y-1-ms]); do b[y-1-ms] = rand()&hm; while(b[y-1-ms] >= M[y-1-ms]);}} {int j; for(j=y-ms; j