/*	The FGLM Algorithm from axcas.net
	by Roman Pearce, October 2025

	This code is released into the public domain.

	This software and documentation is provided "as is", without warranty of any kind,
	express or implied, including but not limited to the warranties of merchantability,
	fitness for a particular purpose, and noninfringement.  In no event shall the authors
	or copyright holders be liable for any claim, damages, or other liability, whether in
	an action of contract, tort, or otherwise, arising from, out of or in connection with
	the software or the use or other dealings in the software.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <time.h>

static int info = 5;


/*	Machine integer routines

	We assume the following non-portable things:
	- two's complement integer arithmetic
	- a signed right shift duplicates the sign bit
	- a flat memory model that is byte addressable
	- malloc aligns memory to the word size
*/

#define INT32 int
#define INT64 long long int
#define UINT32 unsigned int
#define UINT64 unsigned long long int
#define CHAR   unsigned char

/* determine word size */
#if UINTPTR_MAX==0xFFFFFFFF
	#define WORDSIZE 32
	typedef INT32	INT;
	typedef UINT32	UINT;
#elif UINTPTR_MAX==0xFFFFFFFFFFFFFFFF
	#define WORDSIZE 64
	typedef INT64	INT;
	typedef UINT64	UINT;
#else
	#error port WORDSIZE
#endif
#define I(x) ((INT)(x))
#define U(x) ((UINT)(x))


/* platform specific assembly support */
#if defined(_MSC_VER) && defined(_M_X64)
	#define MSCx64
	#include <intrin.h>
	#pragma intrinsic(_umul128)
	#pragma intrinsic(_addcarry_u64)
	#pragma intrinsic(_subborrow_u64)
#elif defined(__GNUC__) && defined(__x86_64__)
	#define GNUx64
#elif defined(__GNUC__) && defined(__arm64__)
	#define GNUa64
#elif defined(__GNUC__) && defined(__aarch64__)
	#define GNUa64
#endif


/* floating point sign */
#define fsign(x) (((x) < 0) ? -1 : ((x) > 0))

/* print bits */
void uprint(UINT x)
{
	int i;
	i = WORDSIZE-1;
	for (; i >= 0; i--) {
		printf("%s", (x >> i) & 1 ? "1" : "0");
	}
	printf("\n");
}


/* hash x into h */
UINT uhash(UINT h, UINT x)
{
#if WORDSIZE==32
	return (h ^ x)*270566475UL;
#else
	return (h ^ x)*36064050381096011ULL;
#endif
}


/* xorshift from Marsaglia */
UINT urandom()
{
	UINT64 ss;
	static UINT32 rs = 2463534242UL;
	rs ^= (rs << 13);
	rs ^= (rs >> 17);
	rs ^= (rs << 5);
	if (WORDSIZE==32) return rs;
	ss = rs;
	rs ^= (rs << 13);
	rs ^= (rs >> 17);
	rs ^= (rs << 5);
	ss = (ss << 32) | rs;
	return (UINT)ss;
}


/* absolute value */
UINT uabs(INT x)
{
	UINT s;
	s = x >> (WORDSIZE-1);
	return (x + s) ^ s;
}
#define ABS(x) uabs(x)


/* unsigned maximum */
UINT umax(UINT a, UINT b)
{
	return a > b ? a : b;
}


/* unsigned minimum */
UINT umin(UINT a, UINT b)
{
	return a > b ? b : a;
}


/* next power of two */
UINT up2(UINT x)
{
	x--;
	x |= x >> 2;
	x |= x >> 1;
	x |= x >> 4;
	x |= x >> 8;
	x |= x >> 16;
#if WORDSIZE==64
	x |= x >> 32;
#elif WORDSIZE > 64
	#error "port up2"
#endif
	return x+1;
}


/* count leading zeroes */
UINT ulz(UINT x)
{
	UINT n=0;
	if (!x) return WORDSIZE;
#if WORDSIZE==32
	if (x <= 0x0000FFFF) n += 16, x = x << 16;
	if (x <= 0x00FFFFFF) n += 8, x = x << 8;
	if (x <= 0x0FFFFFFF) n += 4, x = x << 4;
	if (x <= 0x3FFFFFFF) n += 2, x = x << 2;
	if (x <= 0x7FFFFFFF) n++;
#elif WORDSIZE==64
	if (x <= 0x00000000FFFFFFFF) n += 32, x = x << 32;
	if (x <= 0x0000FFFFFFFFFFFF) n += 16, x = x << 16;
	if (x <= 0x00FFFFFFFFFFFFFF) n += 8, x = x << 8;
	if (x <= 0x0FFFFFFFFFFFFFFF) n += 4, x = x << 4;
	if (x <= 0x3FFFFFFFFFFFFFFF) n += 2, x = x << 2;
	if (x <= 0x7FFFFFFFFFFFFFFF) n++;
#else
	#error port ulz
#endif
	return n;
}


/* count trailing zeroes */
UINT utz(UINT x)
{
	UINT n;
	for (n=0; !(x & 1); n++) x = x >> 1;
	return n;
}


/* popcount */
UINT upop(UINT n)
{
	UINT i;
	for (i=0; n; i++) n &= n-1;
	return i;
}


/* floor of log2 for n > 0 */
UINT ulog2(UINT n)
{
	return WORDSIZE-1-ulz(n);
}


/* modified Euclid */
UINT ugcd(UINT a, UINT b)
{
	UINT c;
	while (a && b) {
		if (a < b) c = a, a = b, b = c;
		c = a % b;
		a = b - c;
		b = c;
	}
	return (a | b);
}


/* a^n binary powering */
UINT upow(UINT a, UINT n)
{
	UINT r, s;
	r = 1, s = a;
	if (n & 1) r = s;
	n = n >> 1;
	while (n) {
		s = s * s;
		/* from LSB to MSB */
		if (n & 1) r = r * s;
		n = n >> 1;
	}
	return r;
}

/* 10^n lookup */
UINT upow10(UINT n)
{
#if WORDSIZE==32
	static UINT table[10] = {0};
#elif WORDSIZE==64
	static UINT table[20] = {0};
#else
	#error port upow10
#endif
	if (table[n]) return table[n];
	table[n] = upow(10,n);
	return table[n];
}

/* floor of log(a) base b */
UINT ulog(UINT a, UINT b)
{
	UINT x, y, i;
	if (b <= 1) return 0;
	for (x=1, i=0; i < WORDSIZE; i++) {
		y = x*b;
		if (y > a) break;
		if (y/b != x) break;
		x = y;
	}
	return i;
}


/* floor of a^(1/n) */
UINT uroot(UINT a, UINT n)
{
	UINT i, j, k, x, y, e, w;
	w = WORDSIZE;
	if (n == 0) return 0;
	if (n == 1) return a;
	if (a <= 1) return a;
	if (n >= w) return 1;
	/* binary search */
	i = k = 1;
	k = k << w/n;
	while (i <= k) {
		j = (i + k)/2;
		/* x = j to the nth power */
		for (x=j, e=1; e < n; e++) {
			y = x*j;
			/* overflow check */
			if (y/j != x) break;
			x = y;
		}
		/* x too big or too small */
		if (e < n || x > a) k = j-1;
		else if (x < a) i = j+1;
		else return j;
	}
	return k;
}


/* 1/a mod b from Knuth */
UINT uinvmod(UINT a, UINT b)
{
	UINT p, q, r, x, y, i;
	x = 0, y = 1, p = b;
	/* y < 0 if i odd */
	for (i=0; b; i++) {
		q = a / b;
		r = a % b;
		a = b, b = r;
		r = x;
		x = y - q*x;
		y = r;
	}
	/* not invertible */
	if (a > 1) return 0;
	if (i & 1) y += p;
	return y;
}


/* (a0:a1) += (b0:b1) return carry */
UINT uadd2(UINT *a0, UINT *a1, UINT b0, UINT b1)
{
#if defined(MSCx64)
	unsigned char c;
	c = _addcarry_u64(0,*a0,b0,a0);
	c = _addcarry_u64(c,*a1,b1,a1);
	return c;
#else
	UINT c, d, e;
	*a0 = *a0 + b0;
	  c = *a0 < b0;
	*a1 = *a1 + b1;
	  d = *a1 < b1;
	*a1 = *a1 + c;
	  e = *a1 < c;
	return d | e;
#endif
}


/* (a0:a1) -= (b0:b1) return borrow */
UINT usub2(UINT *a0, UINT *a1, UINT b0, UINT b1)
{
#if defined(MSCx64)
	unsigned char c;
	c = _subborrow_u64(0,*a0,b0,a0);
	c = _subborrow_u64(c,*a1,b1,a1);
	return c;
#else
	UINT t1, c, d, e;
	b0 = *a0 - b0;
	 c = *a0 < b0;
	b1 = *a1 - b1;
	 d = *a1 < b1;
	t1 =  b1 - c;
	 e =  b1 < t1;
	*a0 = b0;
	*a1 = t1;
	return d | e;
#endif
}


/* (lo:hi) = a*b */
UINT umul2(UINT a, UINT b, UINT *hi)
{
#if WORDSIZE==32
	UINT64 t0 = (UINT64)(a)*(b);
	*hi = (t0 >> 32);
	return t0;

#elif defined(MSCx64)
	return _umul128(a,b,hi);

#elif defined(GNUx64)
	__asm__("mulq %1" : "=a"(a), "=d"(b) : "0"(a), "1"(b) : "cc");
	*hi = b; return a;

#elif defined(GNUa64)
	UINT c;
	__asm__("umulh %0, %1, %2" : "=r"(c) : "r"(a), "r"(b) : "cc");
	*hi = c; return a*b;

#elif WORDSIZE==64
	UINT64 a0, b0, a1, b1, r0, r1, r2, t0;
	a0 = a & 0xFFFFFFFF;
	b0 = b & 0xFFFFFFFF;
	a1 = (a >> 32);
	b1 = (b >> 32);
	t0 = a0 * b0;
	r0 = t0 & 0xFFFFFFFF;
	t0 = a1 * b0 + (t0 >> 32);
	r1 = t0 & 0xFFFFFFFF;
	r2 = (t0 >> 32);
	t0 = a0 * b1 + r1;
	/* assign upper 64-bits to hi */
	*hi = a1 * b1 + (t0 >> 32) + r2;
	return (t0 << 32) + r0;
#else
	#error port umul2
#endif
}


/* (lo:hi)/v = q,r from Hacker's Delight */
UINT udiv2(UINT lo, UINT hi, UINT v, UINT *r)
{
#if WORDSIZE==32
	UINT64 t0 = hi;
	t0 = (t0 << 32) + lo;
	if (r) *r = t0 % v;
	return (UINT)(t0 / v);

#elif defined(GNUx64)
	__asm__("divq %4" : "=a"(lo), "=d"(hi) : "0"(lo), "1"(hi), "r"(v) : "cc");
	if (r) *r = hi; return lo;

#elif defined(GNU128)
	unsigned __int128 t0 = hi;
	t0 = (t0 << 64) | lo;
	if (r) *r = t0 % v;
	return (UINT)(t0 / v);

#elif WORDSIZE==64
	UINT64 b, un1, un0, vn1, vn0, q1, q0, un32, un21, un10, rhat;
	INT64  s=0;
	b = (UINT64)1 << 32;
	if (hi >= v) {	/* overflow */
		printf("udiv2 overflow\n");
		r = 0;
		*((UINT *)r) = 0;
		if (r) *r = 0xFFFFFFFFFFFFFFFF;
		return 0xFFFFFFFFFFFFFFFF;
	}
	s = ulz(v);
	v = v << s;
	vn1 = v >> 32;
	vn0 = v & 0xFFFFFFFF;
	un32 = (hi << s) | ((lo >> (64 - s)) & ((-s) >> 63));
	un10 = (lo << s);
	un1 = un10 >> 32;
	un0 = un10 & 0xFFFFFFFF;
	q1 = un32/vn1;
	rhat = un32 - q1*vn1;
    q1:	if (q1 >= b || q1*vn0 > b*rhat + un1) {
		q1 = q1 - 1;
		rhat = rhat + vn1;
		if (rhat < b) goto q1;
	}
	un21 = un32*b + un1 - q1*v;
	q0 = un21/vn1;
	rhat = un21 - q0*vn1;
    q0:	if (q0 >= b || q0*vn0 > b*rhat + un0) {
		q0 = q0 - 1;
		rhat = rhat + vn1;
		if (rhat < b) goto q0;
	}
	if (r) *r = (un21*b + un0 - q0*v) >> s;
	return q1*b + q0;
#else
	#error port udiv2
#endif
}


/* reciprocal of divisor */
/* it must be shifted up */
UINT nreciprocal(UINT d)
{
	UINT u0, u1, q;
	static UINT D = 0;
	static UINT Q = 0;
	if (d==D) return Q;
	u0 = -1; u1 = -d-1;
	q = udiv2(u0,u1,d,0);
	D = d, Q = q;
	return q;
}


/* 2/1 division from Moller and Granlund 2011 */
/* d is a normalized divisor with top bit set */
/* v = nreciprocal(d) divide u0 + u1*2^B by d */
UINT ndiv2(UINT u0, UINT u1, UINT d, UINT v, UINT *R)
{
	UINT q0, q1, r;
	q0 = umul2(u1,v,&q1);
	q0 += u0;
	q1 += u1 + (q0 < u0) + 1;
	r = u0 - q1*d;
	if (r > q0) q1--, r += d;
	if (r >= d) q1++, r -= d;
	if (R) *R = r;
	return q1;
}


/* multiply a * b mod c */
/* directly divide by c */
UINT umulmod(UINT a, UINT b, UINT c)
{
	/* (a:b) = a * b */
	/* a = (a:b) % c */
	a = umul2(a,b,&b);
	udiv2(a,b,c,&a);
	return a;
}

/* mulmod a * b mod c where */
/* u = c << w is normalized */
/* and v = nreciprocal of u */
/* and both a < c and b < c */
UINT nmulmod(UINT a, UINT b, UINT u, UINT v, UINT w)
{
	/* (a:b) = (a*b) << w */
	a = umul2(a << w, b, &b);
	ndiv2(a,b,u,v,&a);
	return a >> w;
}


/* a^b mod c, binary method */
/* u = c << w is normalized */
/* and v = nreciprocal of u */
UINT npowmod(UINT a, UINT b, UINT u, UINT v, UINT w)
{
	UINT r=1, s;
	s = a % (u >> w);
	if (b & 1) r = s;
	while (b >>= 1) {
		s = nmulmod(s,s,u,v,w);
		if (b & 1) r = nmulmod(r,s,u,v,w);
	}
	return r;
}


/* a^b mod c, binary powering */
UINT upowmod(UINT a, UINT b, UINT c)
{
	UINT u, v, w;
	w = ulz(c), u = c << w, v = nreciprocal(u);
	return npowmod(a,b,u,v,w);
}


/* a+b mod c, a and b are reduced */
UINT uaddmod(UINT a, UINT b, UINT c)
{
	return (a < c-b ? a+b : a+b-c);
}


/* a-b mod c, a and b are reduced */
UINT usubmod(UINT a, UINT b, UINT c)
{
	return (a < b ? a-b+c : a-b);
}


double realtime()
{
	double t = clock();
	return t / CLOCKS_PER_SEC;
}


/*	F4 Algorithm

	Monomials are integers referring to exponent vectors stored in the f4_monom block of memory.
	EXPVEC(m) returns the start of the exponent vector and COLUMN(m) returns the column number.
	EXPVEC(0) is scratch space.  Monomials are added to a hash table to make them unique.

	Polynomials have an array of monomials and a array of coefficients plus other information.

	Matrix rows have an array of coefficients (not duplicated) and an array of indices encoded as follows:
	it first records an index using an entire word, this is followed by a sequence of unsigned characters
	recording differences from 1 to 255 from the previous index, ending in the zero character.
*/

typedef struct f4row {
	INT   len;	/* number of terms in the matrix row */
	INT   fac;	/* monomial cofactor from the syzygy */
	INT  *cof;	/* an array of coefficients modulo p */
	INT  *mon;	/* array of monomials from the basis */
	CHAR *ind;	/* array of column index differences */
	INT   siz;	/* bytes of encoded sparsity pattern */
} f4row;

typedef struct f4syz {
	INT   lcm;	/* the lead monomial to be cancelled */
	f4row *row0;	/* pointers to the basis polynomials */
	f4row *row1;
} f4syz;

INT	f4_prime;		/* the current prime */
INT	f4_nvars;		/* total # variables */
INT	f4_nelim;		/* # for elimination */

INT    *f4_table, f4_tsize;	/* hash table & size */
INT    *f4_monom, f4_mload;	/* array & num. used */
INT	f4_mutex;		/* hash & array lock */

f4row **f4_basis;		/* basis polynomials */
INT     f4_bload, f4_bsize;	/* num. used / total */

f4syz **f4_pairs;		/* array of syzygies */
INT     f4_pload, f4_psize;	/* num. used / total */

f4row **f4_extra;		/* extra polynomials */
INT	f4_eload, f4_esize;	/* num. used / total */

f4row **f4_array;		/* big sparse matrix */
INT	f4_aload, f4_asize;	/* rows used / total */

INT    *f4_mused;		/* monomials present */
INT	f4_uload, f4_usize;	/* cols used / total */

INT	f4_limit = 2048;	/* select pair limit */
INT	f4_dprev = 0;		/* previous pair deg */



/* monomial exponent, column index */
#define EXPVEC(m) (f4_monom + m*(f4_nvars+2))
#define COLUMN(m) (f4_monom + m*(f4_nvars+2))[f4_nvars]
#define LEXCOL(m) (f4_monom + m*(f4_nvars+2))[f4_nvars+1]

#define HASHVAL  (I(1) << (WORDSIZE-1))

/* normalize x with -p <= x < p to 0 <= x < p */
#define NORMAL(x,p) x += (x >> (WORDSIZE-1)) & p


/*
	monomial hash table
*/

/* initialize */
void f4_mon_init(INT nvars, INT nelim)
{
	INT m, s, i, j;

	if (nelim > nvars) nelim = nvars;
	if (nelim < 0) nelim = 0;

	f4_nvars = nvars;
	f4_nelim = nelim;

	/* monomial length */
	m = f4_nvars+2;

	/* must be power of two */
	f4_tsize = s = I(1) << 10;
	f4_table = malloc(2*s*sizeof(INT));
	f4_monom = malloc(m*s/2*sizeof(INT));
	f4_mload = 1;	/* EXPVEC(0) == scratch */
	for (i=0; i < 2*s  ; i++) f4_table[i] = 0;
	for (i=0; i < m*s/2; i++) f4_monom[i] = 0;
	f4_mutex = 0;
}

/* dispose */
void f4_mon_free()
{
	free(f4_table); free(f4_monom);
	f4_table = f4_monom = 0;
	f4_tsize = f4_mload = 0;
	f4_nvars = 0;
	f4_mutex = 0;
}

/* reload monoms */
void f4_mon_rehash()
{
	INT *e, n, s, h, i, k, m;
	n = f4_nvars; s = f4_tsize;
	for (i=0; i < 2*s; i++) f4_table[i] = 0;
	for (m=1; m < f4_mload; m++) {
		e = EXPVEC(m);
		/* hash can't be negative */
		for (h=1,i=0; i < n; i++) h = (uhash(h,e[i]) | HASHVAL) >> 1;
		for (k=h,i=0; i < s; i++) {
			k = (k+i) & (s-1);
			if (f4_table[k] == 0) break;
		}
		f4_table[k+0] = h;
		f4_table[k+s] = m;
	}
}

/* enlarge table */
void f4_mon_resize()
{
	INT m, s;
	s = f4_tsize;
	m = f4_nvars+2;
	s = f4_tsize = 2*s;
	f4_table = realloc(f4_table, 2*s*sizeof(INT));
	f4_monom = realloc(f4_monom, m*s/2*sizeof(INT));
	f4_mon_rehash();
}


/* 
	monomial operations
*/

/* create monomial */
INT f4_mon_new(INT *e)
{
	INT *T, *v, n, s, h, i, j, k, m, t;
	T = f4_table; s = f4_tsize; n = f4_nvars;
	for (h=1,i=0; i < n; i++) h = (uhash(h,e[i]) | HASHVAL) >> 1;
	for (k=h,i=0; i < s; i++) {
		k = (k+i) & (s-1);
		if (T[k] == 0) break;
		if (T[k] != h) continue;
		m = T[s+k]; v = EXPVEC(m);
		/* check that the exponents equal */
		for (j=0; j < n && e[j]==v[j]; j++) ;
		if (j==n) return m;
	}
	m = f4_mload++; 
	T[s+k] = m; T[k] = h; v = EXPVEC(m);
	for (j=0; j < n; j++) v[j] = e[j]; v[j] = 0;
	if (m+1 == s/2) f4_mon_resize();
	return m;
}

/* compare two monomials */
INT f4_mon_cmp(INT A, INT B)
{
	INT da, db;
	INT *a, *b, n, e, i, j, k;
	if (A==B) return 0;
	a = EXPVEC(A);
	b = EXPVEC(B);
	n = f4_nvars;
	e = f4_nelim;

	if (e >= n) goto plex;
	i = j = 0; k = e;
	if (e) goto comp;
last:	i = j = e; k = n;

comp:	/* grevlex order */
	for (da=db=0; i < k; i++) {
		da += a[i];
		db += b[i];
	}
	if (da != db) return (da-db);
	while (--i > j) {
		if (a[i] != b[i]) return (b[i]-a[i]);
	}
	if (k==n) return 0;
	goto last;

plex:	for (i=0; i < n; i++) {
		if (a[i] != b[i]) return (a[i] - b[i]);
	}
	return 0;
}

/* total degree */
INT f4_mon_deg(INT A)
{
	INT *a, n, i, d;
	n = f4_nvars;
	a = EXPVEC(A);
	for (d=i=0; i < n; i++) d += a[i];
	return d;
}

/* division test */
INT f4_mon_div(INT A, INT B)
{
	INT *a, *b, n, i;
	n = f4_nvars;
	a = EXPVEC(A); b = EXPVEC(B);
	for (i=0; i < n && a[i] >= b[i]; i++) ;
	return (i==n);
}

/* multiply */
INT f4_mon_mul(INT A, INT B)
{
	INT *a, *b, *c, n, i;
	n = f4_nvars;
	a = EXPVEC(A); b = EXPVEC(B); c = EXPVEC(0);
	for (i=0; i < n; i++) c[i] = a[i] + b[i];
	return f4_mon_new(c);
}

/* quotient */
INT f4_mon_quo(INT A, INT B)
{
	INT *a, *b, *c, n, i;
	n = f4_nvars;
	a = EXPVEC(A); b = EXPVEC(B); c = EXPVEC(0);
	for (i=0; i < n; i++) c[i] = a[i] - b[i];
	return f4_mon_new(c);
}

/* greatest common divisor */
INT f4_mon_gcd(INT A, INT B)
{
	INT *a, *b, *c, n, i;
	n = f4_nvars;
	a = EXPVEC(A); b = EXPVEC(B); c = EXPVEC(0);
	for (i=0; i < n; i++) c[i] = a[i] > b[i] ? b[i] : a[i];
	return f4_mon_new(c);
}

/* least common multiple */
INT f4_mon_lcm(INT A, INT B)
{
	INT *a, *b, *c, n, i;
	n = f4_nvars;
	a = EXPVEC(A); b = EXPVEC(B); c = EXPVEC(0);
	for (i=0; i < n; i++) c[i] = a[i] > b[i] ? a[i] : b[i];
	return f4_mon_new(c);
}

/* test relatively prime */
INT f4_mon_prm(INT A, INT B)
{
	INT *a, *b, n, i;
	n = f4_nvars;
	a = EXPVEC(A); b = EXPVEC(B);
	for (i=0; i < n; i++) {
		if (a[i] && b[i]) return 0;
	}
	return 1;
}

/* test A depends on B */
INT f4_mon_dep(INT A, INT B)
{
	INT *a, *b, n, i;
	n = f4_nvars;
	a = EXPVEC(A); b = EXPVEC(B);
	for (i=0; i < n; i++) {
		if (!a[i] && b[i]) return 0;
	}
	return 1;
}

/* variable x[k] */
INT f4_mon_var(INT k)
{
	INT *a, n, i;
	n = f4_nvars;
	a = EXPVEC(0);
	for (i=0; i < n; i++) a[i] = (i==k);
	return f4_mon_new(a);
}

/* constant */
INT f4_mon_one()
{
	INT *a, n, i;
	n = f4_nvars;
	a = EXPVEC(0);
	for (i=0; i < n; i++) a[i] = 0;
	return f4_mon_new(a);	
}

/* for debugging */
void f4_mon_print(INT A)
{
	INT *e, n, i, d;
	if (A < 0) {
		A = -A;
		printf("_");
	}
	n = f4_nvars;
	e = EXPVEC(A);
	for (d=i=0; i < n; i++) {
		d += e[i];
		if (e[i]==0) continue;
		printf("*x%lld^%lld",(long long int)i,(long long int)e[i]);
	}
	if (d==0) printf("*1");
}

/* sort array of monomials */
void f4_mon_sort(INT *L, INT l)
{
	INT i, j, k, m;
	k = l;
	if (k < 2) return;
sort:	k = 5*(k+1)/13;
	for (i=k-1; i < l; i++) {
		m = L[i];
		for (j=i; j >= k; j-=k) {
			if (f4_mon_cmp(m,L[j-k]) > 0) break;
			L[j] = L[j-k];
		}
		L[j] = m;
	}
	if (k > 1) goto sort;
}


/*
	polynomial operations
*/

f4row * f4row_new()
{
	f4row *a;
	a = malloc(sizeof(f4row));
	memset((void *)a,0,sizeof(f4row));
	return a;
}

void f4row_free(f4row *a)
{
	if (!a) return;
	free(a->cof);
	free(a->mon);
	free(a->ind);
	free(a);
}

/* sort polynomial */
f4row * f4row_sort(f4row *a)
{
	INT i, j, k, l, m, c;
	l = k = a->len;
	if (k < 2) goto done;
sort:	k = 5*(k+1)/13;
	for (i=k-1; i < l; i++) {
		m = a->mon[i];
		c = a->cof[i];
		for (j=i; j >= k; j-=k) {
			if (f4_mon_cmp(a->mon[j-k],m) >= 0) break;
			a->mon[j] = a->mon[j-k];
			a->cof[j] = a->cof[j-k];
		}
		a->mon[j] = m;
		a->cof[j] = c;
	}
	if (k > 1) goto sort;
done:	return a;
}

/* print row data */
void f4row_print(f4row *a)
{
	INT i, j, k, l, z;
	if (a->ind) {
		i = j = k = 0;
		while (j < a->len) {
			z = *(INT *)(a->ind+k); k += sizeof(INT);
		print:	printf("+%lld[%lld]",(long long int)(a->cof[j]),(long long int)z);
			l = z, j++;
			if (a->ind[k]) {
				z = l - a->ind[k]; k++;
				goto print;
			}
			else k++;
		}
		printf("\n");
		return ;
	}
	if (a->fac) {
		f4_mon_print(a->fac);
		printf(" : ");
	}
	for (i=0; i < a->len; i++) {
		printf("+%lld",(long long int)(a->cof[i]));
		f4_mon_print(a->mon[i]);
	}
	printf("\n");
}

/* put in array */
void f4_addrow(f4row *a)
{
	if (f4_aload + 1 > f4_asize) {
		f4_asize = 3*f4_asize/2;
		f4_array = realloc(f4_array, f4_asize*sizeof(f4row *));
	}
	f4_array[f4_aload] = a;
	f4_aload++;
}

/* put in basis */
void f4_insert(f4row *a)
{
	if (f4_bload + 1 > f4_bsize) {
		f4_bsize = 3*f4_bsize/2;
		f4_basis = realloc(f4_basis, f4_bsize*sizeof(f4row *));
	}
	f4_basis[f4_bload] = a;
	f4_bload++;	
}

/* encode dense vector */
f4row * f4_reduce_export(INT n, INT *vec)
{
	f4row *a = 0;
	INT i, j, k, l;
	unsigned char *buf;

	if (n < 0) return 0;
	buf = malloc(n*sizeof(INT)+n);
	j = k = 0; l = -1; 
	for (i=n; i >= 0; i--) {
		if (vec[i]==0) continue;
		if (l == -1) {
			*(INT *)(buf+k) = i;
			k += sizeof(INT);
		}
		else if (l-i <= 255) {
			buf[k++] = (unsigned char)(l-i);
		}
		else {
			buf[k++] = 0;
			*(INT *)(buf+k) = i;
			k += sizeof(INT);
		}
		l = i;
		j++;
	}
	if (!j) goto done;

	buf[k++] = 0;	/* extra null */
	a = malloc(sizeof(f4row));
	a->len = j;
	a->fac = 0;
	a->cof = malloc(j*sizeof(INT));
	for (j=0, i=n; i >= 0; i--) {
		if (vec[i]==0) continue;
		a->cof[j++] = vec[i];
		vec[i] = 0;
	}
	a->mon = 0;
	a->ind = malloc(k*sizeof(char));
	memcpy(a->ind,buf,k);

done:	free(buf);
	return a;
}

/* add a*c to dense vector */
INT f4_reduce_import(INT *vec, f4row *a, INT c)
{
	UINT u, v, w;
	INT  x, y, z, i, j, k, l, p, p2;
	p = f4_prime;
	if (!a->len) return -1;
	if (WORDSIZE==64 && p <= 2147483647) {
		i = j = k = 0; p2 = p*p;
		while (j < a->len) {
			z = *(INT *)(a->ind+k); k += sizeof(INT);
		mul:	x = (a->cof[j]) * c;
			y = U(vec[z]) + U(x) - U(p2);
			NORMAL(y,p2);
			vec[z] = y;
			l = z, j++;
			if (a->ind[k]) {
				z = l - a->ind[k]; k++;
				goto mul;
			}
			else k++;
		}
	}
	else {	
		w = ulz(p); u = p << w; v = nreciprocal(u);
		i = j = k = 0;
		while (j < a->len) {
			z = *(INT *)(a->ind+k); k += sizeof(INT);
		mulmod:	x = nmulmod(a->cof[j],c,u,v,w);
			y = U(vec[z]) + U(x) - U(p);
			NORMAL(y,p);
			vec[z] = y;
			l = z, j++;
			if (a->ind[k]) {
				z = l - a->ind[k]; k++;
				goto mulmod;
			}
			else k++;
		}
	}
	return *(INT *)(a->ind);
}

/* reduce vec using pivots */
INT f4_reduce_vector(INT n, INT *vec, f4row **piv)
{
	f4row *a;
	UINT u, v, w;
	INT  x, y, z, i, j, k, l, c, t, p, p2;
	p = f4_prime;
	if (WORDSIZE==64 && p <= 2147483647) {
		p2 = p*p;
		for (i=n; i >= 0; i--) {
			c = vec[i]; if (!c) continue;
			c = vec[i] = c % p;
			a = piv[i]; if (!a) continue;
			j = k = 0;
			while (j < a->len) {
				z = *(INT *)(a->ind+k); k += sizeof(INT);
			mul:	x = (a->cof[j++]) * c;
				y = U(vec[z]) - U(x);
				t = a->ind[k++];
				NORMAL(y,p2);
				vec[z] = y;
				z = z-t;
				if (t) goto mul;
			}
		}
	}
	else {
		w = ulz(p); u = p << w; v = nreciprocal(u);
		for (i=n; i >= 0; i--) {
			c = vec[i]; if (!c) continue;
			a = piv[i]; if (!a) continue;
			j = k = 0;
			while (j < a->len) {
				z = *(INT *)(a->ind+k); k += sizeof(INT);
			mul2:	x = nmulmod(a->cof[j++],c,u,v,w);
				y = U(vec[z]) - U(x);
				t = a->ind[k++];
				NORMAL(y,p);
				vec[z] = y;
				z = z-t;
				if (t) goto mul2;
			}
		}
	}
	return n;
}

/* make row into a pivot */
f4row * f4_reduce_monic(f4row *a)
{
	UINT u, v, w;
	INT  p, c, i;
	p = f4_prime;
	c = uinvmod(a->cof[0],p);
	w = ulz(p); u = p << w; v = nreciprocal(u);
	for (i=0; i < a->len; i++) {
		a->cof[i] = nmulmod(a->cof[i],c,u,v,w);
	}
	return a;
}

void f4mod_init(INT n, INT e, INT p)
{
	f4_dprev = 0;
	f4_limit = 2048;
	f4_prime = p;
	f4_mon_init(n,e);
	f4_aload = f4_bload = f4_pload = f4_eload = 0;
	f4_asize = f4_bsize = f4_psize = f4_esize = 30;
	f4_array = malloc(f4_asize*sizeof(f4row *));
	f4_basis = malloc(f4_bsize*sizeof(f4row *));
	f4_pairs = malloc(f4_psize*sizeof(f4syz *));
	f4_extra = malloc(f4_esize*sizeof(f4row *));
}

void f4mod_free()
{
	INT i;
	for (i=0; i < f4_aload; i++) f4row_free(f4_array[i]);
	for (i=0; i < f4_bload; i++) f4row_free(f4_basis[i]);
	for (i=0; i < f4_eload; i++) f4row_free(f4_extra[i]);
	for (i=0; i < f4_pload; i++) free(f4_pairs[i]);
	free(f4_array);	free(f4_basis); free(f4_pairs); free(f4_extra);
	f4_aload = f4_bload = f4_pload = f4_eload = 0;
	f4_asize = f4_bsize = f4_psize = f4_esize = 0;
	f4_mon_free();
	f4_prime = 0;
}


/*
	FGLM Algorithm

	The FGLM algorithm extends the code for F4.
*/

f4row **f4_lexgb;		/* lexicographic GB */
INT	f4_lload, f4_lsize;	/* num used / total */
INT    *f4_tperm, f4_tvars;	/* target variables */


/* next target monomial */
INT fglm_nextmon(INT m)
{
	f4row *b;
	INT *e, *f, i, j, k, n;

	n = f4_nvars;
	e = EXPVEC(0); f = EXPVEC(m);
	for (i=0; i < n; i++) e[i] = f[i];

	for (i=0; i < f4_tvars; i++) {
		e[f4_tperm[i]] += 1;
		for (j=0; j < i; j++) {
			e[f4_tperm[j]] = 0;
		}
		for (j=0; j < f4_lload; j++) {
			b = f4_lexgb[j];
			m = b->mon[0];
			f = EXPVEC(m);
			for (k=0; k < n; k++) {
				if (e[k] < f[k]) break;
			}
			if (k==n) break;
		}
		if (j==f4_lload) break;
	}
	if (i==f4_tvars) return 0;
	return f4_mon_new(e);
}

/* finite dimensional */
INT fglm_finite()
{
	INT d, *e, *f, i, j, n, x;
	n = f4_nvars;
	f = EXPVEC(0);
	for (i=0; i < n; i++) f[i] = 0;
	for (i=0; i < f4_bload; i++) {
		x = f4_basis[i]->mon[0];
		e = EXPVEC(x);
		d = f4_mon_deg(x);
		for (j=0; j < n; j++) {
			if (e[j]==d) break;
		}
		if (j < n) f[j] = 1;
	}
	for (i=0; f[i] && i < n; i++);
	return (i==n);
}

/* monomial basis */
void fglm_mbasis()
{
	INT *e, *f, i, j, k, n, s, x, y;

	n = f4_nvars;
	f4_uload = 0;
	f4_usize = s = 32;
	f4_mused = malloc(s*sizeof(INT));

	e = malloc(n*sizeof(INT));
	memset(e,0,n*sizeof(INT));

	while (1) {
		x = f4_mon_new(e);
		COLUMN(x) = 2;
		f4_mused[f4_uload++] = x;

		/* next monomial in lex */
		for (i=n-1; i >= 0; i--) {
			e[i]++;
			for (j=i+1; j < n; j++) e[j] = 0;
			for (j=0; j < f4_bload; j++) {
				y = f4_basis[j]->mon[0];
				f = EXPVEC(y);
				for (k=0; k < n; k++) {
					if (e[k] < f[k]) break;
				}
				if (k==n) break;
			}
			if (j==f4_bload) break;
		}
		if (i < 0) break;

		if (f4_uload == f4_usize) {
			s = f4_usize *= 2;
			f4_mused = realloc(f4_mused, s*sizeof(INT));
		}
	}
	free(e);
}

/* monomial border */
void fglm_border()
{
	INT i, j, k, m, s, x, y;
	k = f4_uload;
	/* multiply by target vars */
	for (i=0; i < f4_tvars; i++) {
		x = f4_mon_var(f4_tperm[i]);
		for (j=0; j < k; j++) {
			y = f4_mon_mul(f4_mused[j],x);
			if (COLUMN(y)) continue;

			if (f4_uload == f4_usize) {
				s = f4_usize *= 2;
				f4_mused = realloc(f4_mused, s*sizeof(INT));
			}
			f4_mused[f4_uload++] = y;
			COLUMN(y) = 1;
		}
	}
	if (info >= 3) printf("%lld solutions with %lld monomials in border\n", (long long int)k, (long long int)f4_uload-k);
}

/* symbolic preprocessing */
void fglm_symbol()
{
	f4row *a, *b;
	INT i, j, k, l, m, q, s, t, x, y, z;

	f4_aload = 0;
	f4_asize = s = 32;
	f4_array = malloc(s*sizeof(f4row *));

	for (l=i=0; i < f4_uload; i++) {
		x = f4_mused[i];
		if (COLUMN(x)==2) continue;
		COLUMN(x) = 2;

		for (j=0; j < f4_bload; j++) {
			y = f4_basis[j]->mon[0];
			if (f4_mon_div(x,y)) break;
		}
		if (j==f4_bload) continue;

		b = f4_basis[j];
		q = f4_mon_quo(x,y);
		a = malloc(sizeof(f4row));
		a->len = b->len;
		a->fac = q;
		a->cof = b->cof;
		a->mon = b->mon;
		a->ind = 0;

		if (f4_aload == f4_asize) {
			s = f4_asize *= 2;
			f4_array = realloc(f4_array, s*sizeof(f4row *));
		}
		f4_array[f4_aload++] = a;

		if (f4_uload + a->len > f4_usize) {
			f4_usize = s = 2*f4_usize + a->len;
			f4_mused = realloc(f4_mused, s*sizeof(INT));
		}

		l += a->len;
		for (j=1; j < a->len; j++) {
			x = f4_mon_mul(a->fac, a->mon[j]);
			if (COLUMN(x)) continue;
			COLUMN(x) = 1;
			f4_mused[f4_uload++] = x;
		}
	}
	if (info >= 3) printf("%lld x %lld with %lld non-zero, %.1f per row\n", (long long int)f4_aload, (long long int)f4_uload, (long long int)l, (double)l/f4_aload);
}

/* encode row for matrix */
f4row * fglm_encode_row(f4row *a, unsigned char *buf)
{
	INT i, j, k, l, m;
	l = -1;
	for (k=j=0; j < a->len; j++) {
		m = a->mon[j];
		if (m < 0) {
			/* LHS lex monomial */
			m = f4_mon_mul(a->fac, -m);
			m = LEXCOL(m);
		}
		else {	/* RHS grevlex monomial */
			m = f4_mon_mul(a->fac,  m);
			m = COLUMN(m);
		}
		if (l == -1) {
			/* store first index */
			*(INT *)(buf+k) = m;
			k += sizeof(INT);
		}
		else if (l > m && l-m <= 255) {
			/* store difference from previous */
			/* in sequence terminated by null */
			buf[k++] = (unsigned char)(l-m);
		}
		else {
			/* null byte to stop the sequence */
			buf[k++] = 0;
			/* store new index */
			*(INT *)(buf+k) = m;
			k += sizeof(INT);
		}
		l = m;
	}
	buf[k++] = 0;	/* extra null */
	a->ind = malloc(k*sizeof(char));
	memcpy(a->ind,buf,k);
	a->siz = k;
	return a;
}

/* encode matrix */
void fglm_encode(INT o)
{
	f4row *a;
	INT i, j, k, l, m, s, t, *M;
	unsigned char *buf;

	/* add space before monomials */
	f4_mon_sort(f4_mused, f4_uload);
	M = malloc((f4_uload+o)*sizeof(INT));
	for (i=0; i < o; i++) M[i] = 0;
	for (i=0; i < f4_uload; i++) {
		M[i+o] = f4_mused[i];
	}
	free(f4_mused); f4_mused = M;
	f4_usize = f4_uload = f4_uload+o;

	/* assign columns */
	for (i=0; i < f4_uload; i++) {
		m = f4_mused[i];
		if (!m) continue;
		COLUMN(m) = i;
	}
	buf = malloc(f4_uload*(sizeof(INT)+1));
	for (s=t=0, i=0; i < f4_aload; i++) {
		a = f4_array[i];
		fglm_encode_row(a,buf);
		s += a->siz; t += a->len;
	}
	if (info >= 3) printf("%.3f bytes per non-zero, matrix encoded in %.3f MB\n", (double)s/t, (double)s/1024/1024);
	free(buf);
}

/* decode monomials in row */
f4row * fglm_decode_row(f4row *a)
{
	INT i, j, k, l, s, z;
	s = a->len;
	if (s==0) return a;
	a->mon = malloc(s*sizeof(INT));
	j = k = 0;
	while (j < a->len) {
		z = *(INT *)(a->ind+k); k += sizeof(INT);
	monom:	a->mon[j] = f4_mused[z];
		l = z; j++;
		if (a->ind[k]) {
			z = l - a->ind[k]; k++;
			goto monom;
		}
		else k++;
	}
	a->fac = f4_mon_one();
	return a;
}

/* FGLM mod p */
void fglm_mod()
{
	INT *vec;
	unsigned char *buf;
	f4row *b, *c, **piv, **nfm;
	INT d, e, i, j, k, l, m, n, o, p, s, t, u, v, w;
	double t0, t1;

	n = f4_nvars;
	e = f4_nelim;
	p = f4_prime;
	t = f4_tvars;
	if (info >= 3) printf("FGLM in %lld/%lld variables mod p=%lld\n", (long long int)t, (long long int)n, (long long int)p);

	t0 = realtime();
	if (!fglm_finite()) goto fail;
	fglm_mbasis(); o = f4_uload+1;
	fglm_border();
	fglm_symbol();
	fglm_encode(o);

	/* output */
	f4_lload = 0;
	f4_lsize = s = 32;
	f4_lexgb = malloc(s*sizeof(f4row *));

	/* initialize buffer and pivots */
	vec = malloc(f4_uload*sizeof(INT));
	memset(vec,0,f4_uload*sizeof(INT));
	buf = malloc(f4_uload*(sizeof(INT)+1));
	memset(buf,0,f4_uload*(sizeof(INT)+1));
	piv = malloc(f4_uload*sizeof(f4row *));
	memset(piv,0,f4_uload*sizeof(f4row *));
	nfm = malloc(f4_uload*sizeof(f4row *));
	memset(nfm,0,f4_uload*sizeof(f4row *));
	for (i=0; i < f4_aload; i++) {
		b = f4_array[i];
		k = *(INT *)(b->ind);
		piv[k] = b;
		nfm[k] = b;
	}
	free(f4_array); f4_array = 0;
	f4_asize = f4_aload = 0;

	/* first monom */
	m = f4_mon_one(); l = 0;
	f4_mused[l] = -m;
	LEXCOL(m) = l++;

	b = malloc(sizeof(f4row));
	b->mon = malloc(2*sizeof(INT));
	b->cof = malloc(2*sizeof(INT));
	b->ind = 0; b->fac = m;
	b->len = 2; b->siz = 0;

	/* 1 (grevlex) = 1 (lex) */
	b->cof[0] = 1; b->cof[1] =  1;
	b->mon[0] = m; b->mon[1] = -m;

	t = uroot(o-1,2)+1;
	v = f4_mon_var(f4_tperm[0]);

	while (1) {
		/* find the monic univariate polynomial in v */
		/* use one reduced equation to make the next */
		if ((l % t)==0) printf("%.1f/", 100.0*l/(o-1));

		/* reduce b with piv */
		fglm_encode_row(b,buf);
		s = f4_reduce_import(vec,b,1);
		f4_reduce_vector(s,vec,piv);
		b = f4_reduce_export(s,vec);
		b = f4_reduce_monic(b);
		b = fglm_decode_row(b);

		/* only lex monom? */
		k = *(INT *)(b->ind);
		if (k < o) break;
		piv[k] = b;

		/* next monomial */
		m = f4_mon_mul(m,v);
		f4_mused[l] = -m;
		LEXCOL(m) = l++;

		/* multiply reduced row */
		c = malloc(sizeof(f4row));
		c->len = b->len;
		c->cof = b->cof;
		c->mon = b->mon;
		c->ind = 0;
		c->fac = v;
		c->siz = 0;
		b = c;
	}

poly:	/* fix up monomials and add to lex basis */
	for (j=0; j < b->len; j++) b->mon[j] *= -1;
	if (f4_lload == f4_lsize) {
		s = f4_lsize *= 2;
		f4_lexgb = realloc(f4_lexgb, s*sizeof(f4row *));
	}
	f4_lexgb[f4_lload++] = b;
	l--;

	w = m;
	while (m = fglm_nextmon(m)) {
		f4_mused[l] = -m;
		LEXCOL(m) = l++;
		if ((l % t)==0 && l < o-1) printf("%.1f/", 100.0*l/(o-1));

		if (f4_mon_div(m,w)) {
			/* scale previous nf */
			c->fac = f4_mon_quo(m,w);
			c = fglm_encode_row(c,buf);
			s = f4_reduce_import(vec,c,1);
			f4row_free(c);
			f4_reduce_vector(s,vec,nfm);
			c = f4_reduce_export(s,vec);
			c = fglm_decode_row(c);
			free(c->ind); c->ind = 0; c->siz = 0;
			b = c;
		}
		else {
			/* compute nf */
			u = f4_mon_one();
			b = malloc(sizeof(f4row));
			b->mon = malloc(2*sizeof(INT));
			b->cof = malloc(2*sizeof(INT));
			b->ind = 0; b->fac = u;
			b->len = 2; b->siz = 0;
			b->cof[0] = 1; b->mon[0] =  u;
			b->cof[1] = 1; b->mon[1] = -u;
			for (j=0; j < f4_nvars; j++) {
				d = EXPVEC(m)[j];
				if (!d) continue;
				u = f4_mon_var(j);
				for (k=0; k < d; k++) {
					b->fac = u;
					b = fglm_encode_row(b,buf);
					s = f4_reduce_import(vec,b,1);
					f4row_free(b);
					f4_reduce_vector(s,vec,nfm);
					b = f4_reduce_export(s,vec);
					b = fglm_decode_row(b);
					free(b->ind); b->ind = 0; b->siz = 0;
				}
			}
		}

		/* copy the normal form */
		c = malloc(sizeof(f4row));
		memcpy(c,b,sizeof(f4row));

		/* reduce b with piv */
		fglm_encode_row(b,buf);
		s = f4_reduce_import(vec,b,1);
		f4_reduce_vector(s,vec,piv);
		b = f4_reduce_export(s,vec);
		b = f4_reduce_monic(b);
		b = fglm_decode_row(b);
		k = *(INT *)(b->ind);
		if (k < o) goto poly;
		piv[k] = b;
		w = m;
	}
	if (info >= 3) printf("100.0/\n");

	t1 = realtime();
	printf("%lld basis elements, %.3f sec\n", (long long int)f4_lload, t1-t0);
fail:	return;
}


/*
	import polynomials
*/

#define MAXVARS 1024
char * vars[MAXVARS] = {0};
char * lvar[MAXVARS] = {0};

/* get integer */
INT getint(char *s, int *l)
{
	int i, j;
	INT c = 0;
	for (i=0; s[i]; i++) {
		if ('0' <= s[i] && s[i] <= '9') {
			c = 10*c + (s[i] - '0');
		}
		else break;
	}
	*l = i; return c;
}

/* get variable */
char * getvar(char *s, int *l)
{
	char *v;
	int i, j;
	for (i=0; s[i]; i++) {
		/* variables begin with a letter */
		/* or underscore but can include */
		/* digits after the first letter */
		if (('_' <= s[i] && s[i] <= 'z')
		 || ('A' <= s[i] && s[i] <= 'Z')
		 || ('0' <= s[i] && s[i] <= '9' && i > 0)
		) continue; else break;
	}
	if (i==0) return 0;
	v = malloc((i+1)*sizeof(char));
	for (j=0; j < i; j++) v[j] = s[j]; v[j] = 0;
	*l = i; return v;
}

/* import one term of polynomial */
INT getmon(char *s, INT *c, int *l)
{
	INT z[MAXVARS] = {0};
	INT b, e, m, n, p, t;
	int i, j, k;
	char *v;

	n = f4_nvars;
	p = f4_prime; 
	*c = 1; i = 0;
next:	switch (s[i]) {
	case '+':	if (i > 0) goto done;
			i++; t=+1; break;
	case '-':	if (i > 0) goto done;
			i++; t=-1; break;
	case '*':	i++; break;
	case '/':	goto fail;
	case '\n':	goto done;
	}

	/* coefficient? */
	b = getint(s+i,&j);
	if (j > 0 && t==+1) *c = b % p;
	if (j > 0 && t==-1) *c = (p-b) % p;
	i += j; if (j) goto next;

	/* var^exponent */
	v = getvar(s+i,&j);
	if (!v) goto fail;
	i += j; e = 1;
	if (s[i] == '^') {
		i++;
		e = getint(s+i,&j);
		if (j==0) goto fail;
		i += j;
	}

	/* put exponent in z */
	for (k=0; k < n; k++) {
		if (strcmp(v,vars[k])) continue;
		z[k] = e; break;
	}
	free(v);
	if (k==n) goto fail;
	goto next;

done:	*l = i;
	return f4_mon_new(z);

fail:	printf("error: can't parse term\n");
	return 0;
}

/* import expanded polynomial */
f4row * getpol(char *s, int *l)
{
	f4row *b;
	INT c, m;
	int i, j, k;

	/* count the number of terms */
	for (k=1, i=0; s[i] != '\n'; i++) {
		if (s[i] == '+' || s[i] == '-') k++;
	}

	b = f4row_new();
	b->len = b->fac = 0;
	b->cof = malloc(k*sizeof(INT));
	b->mon = malloc(k*sizeof(INT));
	b->ind = 0;

	for (i=k=0; s[i] != '\n'; i+=j) {
		if (s[i]==0) return 0;
		m = getmon(s+i,&c,&j);
		if (m==0) break;
		if (c==0) continue;
		b->cof[k] = c;
		b->mon[k] = m;
		b->len =  ++k;
	}
	f4row_sort(b);

	*l = i;
	return b;
}

/* write polynomial to file */
void putpol(f4row *b, FILE *out)
{
	INT *e, i, j, m, n, c;
	n = f4_nvars;
	for (i=0; i < b->len; i++) {
		c = b->cof[i];
		m = b->mon[i];
		e = EXPVEC(m);
		fprintf(out,"%+lld",(long long int)c);
		for (j=0; j < n; j++) {
			if (e[j] == 0) continue;
			if (e[j] == 1) fprintf(out,"*%s",vars[j]);
			if (e[j] >= 2) fprintf(out,"*%s^%lld",vars[j],(long long int)e[j]);
		}
	}
}

int main(int argc, char **argv)
{
	FILE *F = 0;
	char *f = 0;
	char *g = 0;
	char *s = 0;
	int i, j, k, l;
	long long int p, m, n, e, t;
	f4row *b;

	setbuf(stdout,0);
	p = n = m = e = t = 0;
	for (i=1; i < argc; i++) {
		if (!strcmp("-p", argv[i]) && i+1 < argc) {
			sscanf(argv[i+1], "%llu", &p);
			i++;
		}
		else if (!strcmp("-e", argv[i]) && i+1 < argc) {
			sscanf(argv[i+1], "%llu", &e);
			i++;
		}
		else if (!strcmp("-v", argv[i]) && i+1 < argc) {
			s = argv[i+1];
			for (j=k=0; s[j]; j++) {
				if (s[j] == '[') continue;
				if (s[j] == ',') continue;
				if (s[j] == ']') continue;
				vars[k] = getvar(s+j, &l);
				if (!vars[k]) printf("error: can not parse variables from %s\n",s+j);
				if (!vars[k]) return 0;
				j += l-1; k++;
			}
			n = k;
			i++;
		}
		else if (!strcmp("-l", argv[i]) && i+1 < argc) {
			s = argv[i+1];
			for (j=k=0; s[j]; j++) {
				if (s[j] == '[') continue;
				if (s[j] == ',') continue;
				if (s[j] == ']') continue;
				lvar[k] = getvar(s+j, &l);
				if (!lvar[k]) printf("error: can not parse variables from %s\n",s+j);
				if (!lvar[k]) return 0;
				j += l-1; k++;
			}
			m = k;
			i++;
		}		
		else {
			f = argv[i];
		}
	}

	if (m==0) {
		for (i=0; i < n; i++) lvar[i] = vars[i];
		m = n;
	}

	/* check necessary arguments */
	if (p==0) printf("error: no prime specified, use -p PRIME\n");
	if (n==0) printf("error: no variables found, use -v [x1,x2,...]\n");
	if (f==0) printf("error: need a filename for input polynomials\n");
	if (!p || !n || !f) return 0;

	/* initialize */
	f4mod_init(n,e,p);
	f4_tvars = m;
	f4_tperm = malloc(m*sizeof(INT));

	/* find each y in x */
	for (i=0; i < m; i++) {
		for (j=0; j < n; j++) {
			if (!strcmp(lvar[i],vars[j])) break;
		}
		if (j==n) {
			printf("error: lex variable not in variables\n");
			return 0;
		}
		f4_tperm[i] = j;
	}

	F = fopen(f,"r");
	if (!F) printf("error: file not found\n");
	if (!F) return 0;

	/* read input file into memory */
	k = fgetc(F); for (i=1; k != EOF; i++) k = fgetc(F);
	s = malloc(i*sizeof(char));
	if (!s) printf("error: file too large?\n");
	if (!s) return 0;

	rewind(F);
	k = fgetc(F);
	for (i=j=0; k != EOF; i++) {
		s[i] = (char)k;
		if (s[i]=='\r') i--;
		k = fgetc(F);
	}
	s[i] = 0;
	fclose(F);

	/* get polynomials */
	for (i=0; s[i]; i++) {
		b = getpol(s+i,&j);
		if (!b) printf("error: can not parse polynomial on line %lld\n", (long long int)f4_aload+1);
		if (!b) return 0;
		f4_insert(b);
		i += j;
	}
	free(s);

	fglm_mod();

	g = malloc(strlen(f)+5);
	for (i=0; f[i]; i++) g[i] = f[i];
	g[i++] = '.';
	g[i++] = 'o';
	g[i++] = 'u';
	g[i++] = 't';
	g[i++] = 0;

	F = fopen(g,"w");
	for (i=0; i < f4_lload; i++) {
		b = f4_lexgb[i];
		putpol(b,F);
		putc('\n',F);
	}
	fclose(F);

	f4mod_free();
	return 0;
}




#if 0
	/* free pivots and encoding */
	for (i=0; i < f4_uload; i++) {
		b = piv[i]; c = nfm[i];
		if (b && !c) f4row_free(b);
		if (c) free(c->ind), free(c);
	}
	free(f4_lexgb); f4_lexgb = 0; f4_lload = f4_lsize = 0;
	free(f4_tperm); f4_tvars = 0;
	free(vec); free(buf); free(piv); free(nfm);
	f4mod_free();
	return format(a);
#endif

