/*
 * A generator program for AltiVec instruction sequences.
 *
 * Written by Holger Bettag (hobold@informatik.uni-bremen.de) and released
 * 12-May-2001 under the GNU Public License.
 *
 * See http://www.informatik.uni-bremen.de/~hobold/AltiVec.html to learn
 * more about the purpose (or lack thereof :-) of this code.
 * 
 * last change 29-Sep-2003
 *
 * fixed bugs in the printed output of splat8even, splat8odd, swap8, unpack8
 * fixed bugs in the calculation of add8us, sub8us
 *
 */

#include <stdio.h>
#include <string.h>

typedef unsigned short uint16;
typedef signed short sint16;

class AVconst;

// emulated AltiVec operator
class AVoperator {
public:
  // returns the number of input values needed
  virtual int numOps(void) {
    return 0;
  };
  // checks prerequisites for this operator; allows operators to restrict
  // the cases in which they may be used by conditionally returning false
  // (allows special-case application of operators that don't normally
  // generate identical short int elements)
  virtual bool isApplicable(uint16 val1, uint16 val2, uint16 val3) {
    return true;
  };
  // compute value from (up to) three parameters
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16 val3) = 0;
  // prints C-style AltiVec expression, returns string length
  virtual int print(char* buf, AVconst* lookup, int index) = 0;
};

// AltiVec generated constant, 16 bits wide
class AVconst {
public:
  int numinst;          // number of instructions needed to generate this value
  AVoperator* genBy;    // the AltiVec operator that generated this value
  unsigned short par1;  // the three parameters of the operation
  unsigned short par2;
  unsigned short par3;

  AVconst() {           // default constructor
    numinst = 0;
    genBy = NULL;
    par1 = 0;
    par2 = 0;
    par3 = 0;
  };
};

// base class for operators with zero parameters
class AVoperator0 : public AVoperator {
  virtual int numOps(void) {
    return 0;
  };
};

// base class for operators with one parameter
class AVoperator1 : public AVoperator {
  virtual int numOps(void) {
    return 1;
  };
};

// base class for operators with two parameters
class AVoperator2 : public AVoperator {
  virtual int numOps(void) {
    return 2;
  };
};

// base class for operators with three parameters
class AVoperator3 : public AVoperator {
  virtual int numOps(void) {
    return 3;
  };
};

// this table will be filled with info on how to generate the index value
AVconst List[65536];

// this table holds all emulated AltiVec operators
AVoperator* OpPool[170];
int OpCount = 0;           // number of active entries

// utility function for name string handling, returns length of source
inline int namecopy(char* dest, char* source) {
  strcpy(dest, source);
  return strlen(source);
}

// special parameterless operators: splat immediate byte and short
class splatb00 : public AVoperator {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0000;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(0)");
  };
};

class splatb01 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0101;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(1)");
  };
};

class splatb02 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0202;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(2)");
  };
};

class splatb03 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0303;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(3)");
  };
};

class splatb04 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0404;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(4)");
  };
};

class splatb05 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0505;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(5)");
  };
};

class splatb06 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0606;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(6)");
  };
};

class splatb07 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0707;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(7)");
  };
};

class splatb08 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0808;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(8)");
  };
};

class splatb09 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0909;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(9)");
  };
};

class splatb0A : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0A0A;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(10)");
  };
};

class splatb0B : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0B0B;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(11)");
  };
};

class splatb0C : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0C0C;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(12)");
  };
};

class splatb0D : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0D0D;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(13)");
  };
};

class splatb0E : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0E0E;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(14)");
  };
};

class splatb0F : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0F0F;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(15)");
  };
};

class splatbFF : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFFF;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-1)");
  };
};

class splatbFE : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFEFE;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-2)");
  };
};

class splatbFD : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFDFD;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-3)");
  };
};

class splatbFC : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFCFC;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-4)");
  };
};

class splatbFB : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFBFB;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-5)");
  };
};

class splatbFA : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFAFA;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-6)");
  };
};

class splatbF9 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xF9F9;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-7)");
  };
};

class splatbF8 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xF8F8;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-8)");
  };
};

class splatbF7 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xF7F7;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-9)");
  };
};

class splatbF6 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xF6F6;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-10)");
  };
};

class splatbF5 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xF5F5;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-11)");
  };
};

class splatbF4 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xF4F4;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-12)");
  };
};

class splatbF3 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xF3F3;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-13)");
  };
};

class splatbF2 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xF2F2;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-14)");
  };
};

class splatbF1 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xF1F1;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-15)");
  };
};

class splatbF0 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xF0F0;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u8(-16)");
  };
};

class splats01 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0001;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(1)");
  };
};

class splats02 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0002;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(2)");
  };
};

class splats03 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0003;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(3)");
  };
};

class splats04 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0004;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(4)");
  };
};

class splats05 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0005;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(5)");
  };
};

class splats06 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0006;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(6)");
  };
};

class splats07 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0007;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(7)");
  };
};

class splats08 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0008;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(8)");
  };
};

class splats09 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x0009;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(9)");
  };
};

class splats0A : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x000A;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(10)");
  };
};

class splats0B : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x000B;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(11)");
  };
};

class splats0C : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x000C;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(12)");
  };
};

class splats0D : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x000D;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(13)");
  };
};

class splats0E : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x000E;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(14)");
  };
};

class splats0F : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0x000F;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(15)");
  };
};

class splatsFE : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFFE;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-2)");
  };
};

class splatsFD : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFFD;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-3)");
  };
};

class splatsFC : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFFC;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-4)");
  };
};

class splatsFB : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFFB;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-5)");
  };
};

class splatsFA : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFFA;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-6)");
  };
};

class splatsF9 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFF9;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-7)");
  };
};

class splatsF8 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFF8;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-8)");
  };
};

class splatsF7 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFF7;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-9)");
  };
};

class splatsF6 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFF6;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-10)");
  };
};

class splatsF5 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFF5;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-11)");
  };
};

class splatsF4 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFF4;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-12)");
  };
};

class splatsF3 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFF3;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-13)");
  };
};

class splatsF2 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFF2;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-14)");
  };
};

class splatsF1 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFF1;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-15)");
  };
};

class splatsF0 : public AVoperator0 {
  virtual uint16 evaluate(uint16, uint16, uint16) {
    return 0xFFF0;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return namecopy(buf, "vec_splat_u16(-16)");
  };
};

// utility functions

// prints the generating strings of 2 parameters comma separated in brackets
// returns total string length
int print2par(AVconst* lookup, char* buf, char* name,
	      uint16 par1, uint16 par2) {
  int len;

  len = sprintf(buf, "%s(", name);
  if (lookup[par1].genBy != NULL) {  
    len += lookup[par1].genBy->print(buf + len, lookup, par1);
  }
  buf[len++] = ',';
  if (lookup[par2].genBy != NULL) {  
    len += lookup[par2].genBy->print(buf + len, lookup, par2);
  }
  len += sprintf(buf + len, ")");
  return len;
}

// prints the generating strings of 3 parameters comma separated in brackets
// returns total string length
int print3par(AVconst* lookup, char* buf, char* name,
	      uint16 par1, uint16 par2, uint16 par3) {
  int len;

  len = sprintf(buf, "%s(", name);
  if (lookup[par1].genBy != NULL) {  
    len += lookup[par1].genBy->print(buf + len, lookup, par1);
  }
  buf[len++] = ',';
  if (lookup[par2].genBy != NULL) {  
    len += lookup[par2].genBy->print(buf + len, lookup, par2);
  }
  buf[len++] = ',';
  if (lookup[par3].genBy != NULL) {  
    len += lookup[par3].genBy->print(buf + len, lookup, par3);
  }
  len += sprintf(buf + len, ")");
  return len;
}


// specific operators

// boolean and
class _and : public AVoperator2 {      // "and" is reserved word
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return val1 & val2;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vand", lookup[index].par1,
		     lookup[index].par2);
  };
};

// boolean and with complement
class andc : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return (val1 & (~val2)) & 0xFFFFL;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vandc", lookup[index].par1,
		     lookup[index].par2);
  };
};

// boolean or
class _or : public AVoperator2 {       // "or" is reserved word
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return val1 | val2;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vor", lookup[index].par1,
		     lookup[index].par2);
  };
};

// boolean complemented or
class nor : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return (~(val1 | val2)) & 0xFFFFL;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vnor", lookup[index].par1,
		     lookup[index].par2);
  };
};

// boolean exclusive or
class _xor : public AVoperator2 {       // "xor" is reserved word
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return val1 ^ val2;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vxor", lookup[index].par1,
		     lookup[index].par2);
  };
};

// splat high byte
class splat8even : public AVoperator1 {
  virtual uint16 evaluate(uint16 val1, uint16, uint16) {
    return (val1 >> 8) * 0x101;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    int len;
    len = sprintf(buf, "vec_vspltb(");
    len += lookup[lookup[index].par1].genBy->print(buf + len, lookup, lookup[index].par1);
    len += sprintf(buf + len, ",0)");
    return len;
  };
};

// splat low byte
class splat8odd : public AVoperator1 {
  virtual uint16 evaluate(uint16 val1, uint16, uint16) {
    return (val1 & 0xff) * 0x101;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    int len;
    len = sprintf(buf, "vec_vspltb(");
    len += lookup[lookup[index].par1].genBy->print(buf + len, lookup, lookup[index].par1);
    len += sprintf(buf + len, ",1)");
    return len;
  };
};

// swap bytes
class swap8 : public AVoperator1 {
  virtual uint16 evaluate(uint16 val1, uint16, uint16) {
    return ((val1 & 0xff) << 8) | ((val1 >> 8) & 0xff);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    int len;
    len = sprintf(buf, "vec_sld(");
    len += lookup[lookup[index].par1].genBy->print(buf + len, lookup, lookup[index].par1);
    buf[len++] = ',';
    len += lookup[lookup[index].par1].genBy->print(buf + len, lookup, lookup[index].par1);
    len += sprintf(buf + len, ",1)");
    return len;
  };
};

// pack pixel
/*|       A       |       R       |       G       |       B       |
 *|3 3 2 2 2 2 2 2|2 2 2 2 1 1 1 1|1 1 1 1 1 1    |               |
 *|1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0|
 *|              *|* * * * *      |* * * * *      |* * * * *      |
 *                                                :
 *                                | |         |   :     |         |
 * how to pack pixels             |1|1 1 1 1 1|   :     |         |
 *                                |5|4 3 2 1 0|9 8:7 6 5|4 3 2 1 0|
 *                                |A|    R    |   :G    |    B    |
 */
class pkpx : public AVoperator1 {
  virtual uint16 evaluate(uint16 val1, uint16, uint16) {
    int val, res;
    val = val1 + (val1 << 16);     // splat 16 bits into 32 bits

    // pack pixel bits
    res = (val >> 9) & 0xFC00L;  // alpha bit & R
    res |= (val >> 6) & 0x03E0L; // G
    res |= (val >> 3) & 0x001FL; // B

    return res;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vpkpx", lookup[index].par1,
		     lookup[index].par1);
  };
};

// add byte modulo
class add8 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return (((val1 & 0xFF00L) + (val2 & 0xFF00L)) & 0xFF00L) |
      (((val1 & 0xFFL) + (val2 & 0xFFL)) & 0xFFL);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vaddubm", lookup[index].par1,
		     lookup[index].par2);
  };
};

// add byte unsigned saturation
class add8us : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    int msb = (val1 >> 8) + (val2 >> 8);
    int lsb = (val1 & 0xff) + (val2 & 0xff);

    // clamp negatives to zero
    msb &= ~(msb >> 15);
    lsb &= ~(lsb >> 15);

    // clamp large values to 255
    msb |= (255 - msb) >> 15;
    lsb |= (255 - lsb) >> 15;

    return (msb << 8) + (lsb & 0xff);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vaddubs", lookup[index].par1,
		     lookup[index].par2);
  };
};

// add byte signed saturation
class add8ss : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    int msb = (signed char)(val1 >> 8) + (signed char)(val2 >> 8);
    int lsb = (signed char)(val1 & 0xff) + (signed char)(val2 & 0xff);

    // find signs
    int msgn = msb >> 15;
    int lsgn = lsb >> 15;

    // find magnitude (ex-negatives are then one too small)
    msb ^= msgn;
    lsb ^= lsgn;
    
    // clamp magnitudes
    msb |= (127 - msb) >> 15;
    msb &= 0x7f;
    lsb |= (127 - lsb) >> 15;
    lsb &= 0x7f;

    // restore sign
    msb ^= msgn;
    lsb ^= lsgn;

    return (msb << 8) + (lsb & 0xff);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vaddsbs", lookup[index].par1,
		     lookup[index].par2);
  };
};

// add short modulo
class add16 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return (val1 + val2) & 0xFFFFL;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vadduhm", lookup[index].par1,
		     lookup[index].par2);
  };
};

// subtract byte modulo
class sub8 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return (((val1 & 0xFF00L) - (val2 & 0xFF00L)) & 0xFF00L) |
      (((val1 & 0xFFL) - (val2 & 0xFFL)) & 0xFFL);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vsububm", lookup[index].par1,
		     lookup[index].par2);
  };
};

// subtract byte unsigned saturation
class sub8us : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    int msb = (val1 >> 8) - (val2 >> 8);
    int lsb = (val1 & 0xff) - (val2 & 0xff);

    // clamp negatives to zero
    msb &= ~(msb >> 15);
    lsb &= ~(lsb >> 15);

    // clamp large values to 255
    msb |= (255 - msb) >> 15;
    lsb |= (255 - lsb) >> 15;

    return (msb << 8) + (lsb & 0xff);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vsububs", lookup[index].par1,
		     lookup[index].par2);
  };
};

// subtract byte signed saturation
class sub8ss : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    int msb = (signed char)(val1 >> 8) - (signed char)(val2 >> 8);
    int lsb = (signed char)(val1 & 0xff) - (signed char)(val2 & 0xff);

    // find signs
    int msgn = msb >> 15;
    int lsgn = lsb >> 15;

    // find magnitude (ex-negatives are then one too small)
    msb ^= msgn;
    lsb ^= lsgn;
    
    // clamp magnitudes
    msb |= (127 - msb) >> 15;
    msb &= 0x7f;
    lsb |= (127 - lsb) >> 15;
    lsb &= 0x7f;

    // restore sign
    msb ^= msgn;
    lsb ^= lsgn;

    return (msb << 8) + (lsb & 0xff);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vsubsbs", lookup[index].par1,
		     lookup[index].par2);
  };
};

// subtract short modulo
class sub16 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return (val1 - val2) & 0xFFFFL;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vsubuhm", lookup[index].par1,
		     lookup[index].par2);
  };
};

// vector select
class sel : public AVoperator3 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16 val3) {
    return (val3 & val2) | ((~val3) & val1);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print3par(lookup, buf, "vec_vsel", lookup[index].par1,
		     lookup[index].par2, lookup[index].par3);
  };
};

// merge bytes (only applicable if result is vector of short int)
class merge8 : public AVoperator2 {
  // we have to verify that the result is a vector of short ints
  virtual bool isApplicable(uint16 val1, uint16 val2, uint16) {
    val1 = (val1 >> 8) ^ val1;  // bitwise compare of hi and lo byte
    val2 = (val2 >> 8) ^ val2;

    if ((val1 | val2) & 0xFF)   // do hi and lo bytes differ?
      return false;

    return true;                // val1 and val2 are actually byte constants
  };

  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return (val1 & 0xFF00L) + (val2 & 0xFF);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vmrghb", lookup[index].par1,
		     lookup[index].par2);
  };
};

// general vector permute is probably equivalent to merge in this special case
class vperm : public AVoperator3 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16 val3) {
    uint16 res;
    if (((val3 >> 8) & 16) != 0) { // pick hi byte from val2?
      res = val2 << ((val3 >> 5) & 8);  // pick even or odd byte
    } else {                // no, pick hi byte from val1 instead
      res = val1 << ((val3 >> 5) & 8);  // pick even or odd byte
    }
    res &= 0xFF00L;  // isolate high byte

    if ((val3 & 16) != 0) { // pick lo byte from val2?
      res |= (val2 >> ((~val3 << 3) & 8)) & 0xFF;  // pick even or odd byte
    } else {                // no, pick lo byte from val1 instead
      res |= (val1 >> ((~val3 << 3) & 8)) & 0xFF;  // pick even or odd byte
    }
    return res;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print3par(lookup, buf, "vec_vperm", lookup[index].par1,
		     lookup[index].par2, lookup[index].par3);
  };
};

// average signed byte
class avgs8 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    uint16 res;
    // calculate result lo byte
    res = ((short)(((signed char)val1) + ((signed char)val2) + 1)) >> 1;
    res &= 0xFF;

    val1 = ((short)val1) >> 8;
    val2 = ((short)val2) >> 8;
    
    // result hi byte
    res |= ((((signed char)val1) + ((signed char)val2) + 1) << 7) & 0xFF00L;

    return res;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vavgsb", lookup[index].par1,
		     lookup[index].par2);
  };
};

// average signed short
class avgs16 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return (int)(((short)val1) + ((short)val2) + 1) >> 1;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vavgsh", lookup[index].par1,
		     lookup[index].par2);
  };
};

// average unsigned byte
class avgu8 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    uint16 res;
    // calculate result lo byte
    res = (((unsigned char)val1) + ((unsigned char)val2) + 1) >> 1;
    res &= 0xFF;

    val1 = ((short)val1) >> 8;
    val2 = ((short)val2) >> 8;
    
    // result hi byte
    res |= ((((unsigned char)val1) + ((unsigned char)val2)
	     + 1) << 7) & 0xFF00L;

    return res;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vavgub", lookup[index].par1,
		     lookup[index].par2);
  };
};

// average unsigned short
class avgu16 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return (unsigned int)(val1 + val2 + 1) >> 1;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vavguh", lookup[index].par1,
		     lookup[index].par2);
  };
};

// rotate left byte
class rol8 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    int shf = val2 & 7;
    int reslo = val1 & 0xFF;

    reslo = ((reslo << shf) | (reslo >> (8 - shf))) & 0xFF;

    int reshi = (val1 >> 8) & 0xFF;
    shf = (val2 >> 8) & 7;
    reshi = ((reshi << shf) | (reshi >> (8 - shf))) & 0xFF;

    return (reshi << 8) | reslo;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vrlb", lookup[index].par1,
		     lookup[index].par2);
  };
};

// rotate left short
class rol16 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    val2 &= 0x0F;
    return ((val1 << val2) | (val1 >> (16 - val2))) & 0xFFFFL;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vrlh", lookup[index].par1,
		     lookup[index].par2);
  };
};

// shift left byte
class sl8 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    int reslo = (val1 << (val2 & 7)) & 0xFF;
    int reshi = ((val1 & 0xFF00) << ((val2 >> 8) & 7)) & 0xFF00L;
    return reshi | reslo;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vslb", lookup[index].par1,
		     lookup[index].par2);
  };
};

// shift left short
class sl16 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return (val1 << (val2 & 0x0F)) & 0xFFFFL;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vslh", lookup[index].par1,
		     lookup[index].par2);
  };
};

// shift right logical byte (unsigned)
class srl8 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    int reslo = ((val1 & 0xFF) >> (val2 & 7)) & 0xFF;
    int reshi = (val1 >> ((val2 >> 8) & 7)) & 0xFF00L;
    return reshi | reslo;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vsrb", lookup[index].par1,
		     lookup[index].par2);
  };
};


// shift right logical short (unsigned)
class srl16 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return (val1 >> (val2 & 0x0F)) & 0xFFFFL;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vsrh", lookup[index].par1,
		     lookup[index].par2);
  };
};

// shift right algebraic byte (signed)
class sra8 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    int reslo = (int(val1 << 24) >> ((val2 & 7) + 24)) & 0xFF;
    int reshi = (int(val1 << 16) >> (((val2 >> 8) & 7) + 16)) & 0xFF00L;
    return reshi | reslo;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vsrab", lookup[index].par1,
		     lookup[index].par2);
  };
};

// shift right algebraic short (signed)
class sra16 : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    return (int(val1 << 16) >> ((val2 & 0x0F)) + 16) & 0xFFFFL;
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vsrah", lookup[index].par1,
		     lookup[index].par2);
  };
};

// min signed byte
class min8s : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    signed char msb = val1 >> 8;
    if ((signed char)(val2 >> 8) < msb)
      msb = val2 >> 8;
    
    signed char lsb = val1 & 0xff;
    if ((signed char)(val2 & 0xff) < lsb)
      lsb = val2 & 0xff;

    return (msb << 8) | (lsb & 0xffL);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vminsb", lookup[index].par1,
		     lookup[index].par2);
  };
};

// min unsigned byte
class min8u : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    unsigned int msb = val1 >> 8;
    if (unsigned(val2 >> 8) < msb)
      msb = val2 >> 8;
    
    unsigned int lsb = val1 & 0xff;
    if (unsigned(val2 & 0xff) < lsb)
      lsb = val2 & 0xff;

    return (msb << 8) | (lsb & 0xffL);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vminub", lookup[index].par1,
		     lookup[index].par2);
  };
};

// max signed byte
class max8s : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    signed char msb = val1 >> 8;
    if ((signed char)(val2 >> 8) > msb)
      msb = val2 >> 8;
    
    signed char lsb = val1 & 0xff;
    if ((signed char)(val2 & 0xff) > lsb)
      lsb = val2 & 0xff;

    return (msb << 8) | (lsb & 0xffL);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vmaxsb", lookup[index].par1,
		     lookup[index].par2);
  };
};

// max unsigned byte
class max8u : public AVoperator2 {
  virtual uint16 evaluate(uint16 val1, uint16 val2, uint16) {
    unsigned int msb = val1 >> 8;
    if (unsigned(val2 >> 8) > msb)
      msb = val2 >> 8;
    
    unsigned int lsb = val1 & 0xff;
    if (unsigned(val2 & 0xff) > lsb)
      lsb = val2 & 0xff;

    return (msb << 8) | (lsb & 0xffL);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vmaxub", lookup[index].par1,
		     lookup[index].par2);
  };
};

// pack low byte signed saturation
class pack8ss : public AVoperator1 {
  virtual uint16 evaluate(uint16 val1, uint16, uint16) {
    sint16 val = (sint16)val1;

    // find sign
    int sgn = val >> 15;

    // find magnitude
    val ^= sgn;
    
    // clamp magnitude
    val |= (127 - val) >> 15;
    val &= 0x7f;

    // restore sign
    val ^= sgn;

    val = val & 0xff;

    return (val << 8) | (val & 0xffL);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    return print2par(lookup, buf, "vec_vpkshss", lookup[index].par1,
		     lookup[index].par1);
  };
};

// unpack bytes to short
class unpack8 : public AVoperator1 {

  // we have to verify that the result is a vector of short ints
  virtual bool isApplicable(uint16 val1, uint16, uint16) {
    // only works when source is a actually a byte constant
    return ((val1 >> 8) == (val1 & 0xff));
  };
  virtual uint16 evaluate(uint16 val1, uint16, uint16) {
    signed char tmp = (val1 & 0xff);
    int tmp2 = tmp;           // extend sign
    return (uint16)(tmp2 & 0xffffL);
  };
  
  virtual int print(char* buf, AVconst* lookup, int index) {
    int len;
    len = sprintf(buf, "vec_vupkhsb(");
    len += lookup[lookup[index].par1].genBy->print(buf + len, lookup, lookup[index].par1);
    len += sprintf(buf + len, ")");
    return len;
  };
};


/*
with 3 parameters:

*vperm
    hmm, replaces a splat byte, or a copy or a rotate by 8, or something else?
*vsel

with 2 parameters:
these in one size (no difference between byte and short)

*and
*andc
*or
*nor
*xor
*merge8     (with isApplicable() restricted to byte inputs)

*add8us     (byte unsigned saturation)
*add8ss     (byte signed saturation)
*sub8us     (byte unsigned saturation)
*sub8ss     (byte signed saturation)

*min8s
*min8u
*max8s
*max8u

these in byte and short

*add        (is saturation redundant? Not for Bytes!)
*sub        (is saturation redundant? Not for Bytes!)
*avgs
*avgu
rol
sl
srl
sra

with one parameter:

*swap bytes (with vsldoi)
*pkpx
*pack8ss
*splat byte even
*splat byte odd
*/



// initialization of operator classes
void initops(void)
{
  /* operations are prioritized top to bottom
   * the higher up the list, the more likely it will be picked
   *
   * the first instruction sequence of a given shortest length
   * found this way will be stored in the list
   */

  OpCount = 0;
  // with zero parameters
  // splat byte
  OpPool[OpCount++] = new splatb00;
  OpPool[OpCount++] = new splatb01;
  OpPool[OpCount++] = new splatb02;
  OpPool[OpCount++] = new splatb03;
  OpPool[OpCount++] = new splatb04;
  OpPool[OpCount++] = new splatb05;
  OpPool[OpCount++] = new splatb06;
  OpPool[OpCount++] = new splatb07;
  OpPool[OpCount++] = new splatb08;
  OpPool[OpCount++] = new splatb09;
  OpPool[OpCount++] = new splatb0A;
  OpPool[OpCount++] = new splatb0B;
  OpPool[OpCount++] = new splatb0C;
  OpPool[OpCount++] = new splatb0D;
  OpPool[OpCount++] = new splatb0E;
  OpPool[OpCount++] = new splatb0F;

  OpPool[OpCount++] = new splatbF0;
  OpPool[OpCount++] = new splatbF1;
  OpPool[OpCount++] = new splatbF2;
  OpPool[OpCount++] = new splatbF3;
  OpPool[OpCount++] = new splatbF4;
  OpPool[OpCount++] = new splatbF5;
  OpPool[OpCount++] = new splatbF6;
  OpPool[OpCount++] = new splatbF7;
  OpPool[OpCount++] = new splatbF8;
  OpPool[OpCount++] = new splatbF9;
  OpPool[OpCount++] = new splatbFA;
  OpPool[OpCount++] = new splatbFB;
  OpPool[OpCount++] = new splatbFC;
  OpPool[OpCount++] = new splatbFD;
  OpPool[OpCount++] = new splatbFE;
  OpPool[OpCount++] = new splatbFF;

  // splat short
  OpPool[OpCount++] = new splats01;
  OpPool[OpCount++] = new splats02;
  OpPool[OpCount++] = new splats03;
  OpPool[OpCount++] = new splats04;
  OpPool[OpCount++] = new splats05;
  OpPool[OpCount++] = new splats06;
  OpPool[OpCount++] = new splats07;
  OpPool[OpCount++] = new splats08;
  OpPool[OpCount++] = new splats09;
  OpPool[OpCount++] = new splats0A;
  OpPool[OpCount++] = new splats0B;
  OpPool[OpCount++] = new splats0C;
  OpPool[OpCount++] = new splats0D;
  OpPool[OpCount++] = new splats0E;
  OpPool[OpCount++] = new splats0F;

  OpPool[OpCount++] = new splatsF0;
  OpPool[OpCount++] = new splatsF1;
  OpPool[OpCount++] = new splatsF2;
  OpPool[OpCount++] = new splatsF3;
  OpPool[OpCount++] = new splatsF4;
  OpPool[OpCount++] = new splatsF5;
  OpPool[OpCount++] = new splatsF6;
  OpPool[OpCount++] = new splatsF7;
  OpPool[OpCount++] = new splatsF8;
  OpPool[OpCount++] = new splatsF9;
  OpPool[OpCount++] = new splatsFA;
  OpPool[OpCount++] = new splatsFB;
  OpPool[OpCount++] = new splatsFC;
  OpPool[OpCount++] = new splatsFD;
  OpPool[OpCount++] = new splatsFE;

  // with more than zero parameters
  // booleans
  OpPool[OpCount++] = new _and;
  OpPool[OpCount++] = new _or;
  OpPool[OpCount++] = new andc;
  OpPool[OpCount++] = new nor;
  OpPool[OpCount++] = new _xor;
  OpPool[OpCount++] = new sel;

  // simple integer arithmetic
  OpPool[OpCount++] = new add8;
  OpPool[OpCount++] = new sub8;
  OpPool[OpCount++] = new avgs8;    // 30
  OpPool[OpCount++] = new avgu8;
  OpPool[OpCount++] = new add8us;
  OpPool[OpCount++] = new add8ss;
  OpPool[OpCount++] = new sub8us;
  OpPool[OpCount++] = new sub8ss;   // 25
  OpPool[OpCount++] = new min8u;
  OpPool[OpCount++] = new min8s;
  OpPool[OpCount++] = new max8u;
  OpPool[OpCount++] = new max8s;
  OpPool[OpCount++] = new add16;
  OpPool[OpCount++] = new sub16;
  OpPool[OpCount++] = new avgs16;
  OpPool[OpCount++] = new avgu16;
  
  // rotates and shifts
  OpPool[OpCount++] = new rol8;
  OpPool[OpCount++] = new sl8;
  OpPool[OpCount++] = new srl8;
  OpPool[OpCount++] = new sra8;
  OpPool[OpCount++] = new rol16;
  OpPool[OpCount++] = new sl16;
  OpPool[OpCount++] = new srl16;
  OpPool[OpCount++] = new sra16;

  // various permutations
  OpPool[OpCount++] = new swap8;
  OpPool[OpCount++] = new splat8even;
  OpPool[OpCount++] = new splat8odd;
  OpPool[OpCount++] = new pack8ss;
  OpPool[OpCount++] = new pkpx;
  OpPool[OpCount++] = new merge8;
  OpPool[OpCount++] = new unpack8;
  OpPool[OpCount++] = new vperm;

  return;
}

void deleteops(void)
{
  for(int i = OpCount - 1; i >= 0; i--) {
    delete OpPool[i];
  }

  OpCount = 0;

  return;
}

void printtable(void)
{
  char buf[100000];
  int len = 0;

  for(int i = 0; i < 65536; i++) {
    printf("%04x (#%d) ", i, List[i].numinst);
    fflush(stdout);
    if (List[i].numinst != 0) {
      if (List[i].genBy != NULL) {
	len = List[i].genBy->print(buf, List, i);
      } else {
	sprintf(buf, "ERROR: NO OPERATOR");
	len = strlen(buf);
      }
    } else {
      sprintf(buf, "(no sequence found)");
    }
    printf("%s\n", buf);
    fflush(stdout);
  }
  
  return;
}

// Collects all temporaries that were produced while generating up to
// three values (including the values themselves).
// Uses the list of constants. Doesn't take common subexpressions into account.
// Fills temporaries into dest array.
// Returns number of temporaries found.
int GetTemporaries(int* dest, int num, int val1, int val2, int val3)
{
  int count = 0;
  
  if (num >= 1) {
    dest[count++] = val1;
    count += GetTemporaries(dest + count, List[val1].genBy->numOps(),
			    List[val1].par1, List[val1].par2, List[val1].par3);
    if (num >= 2) {
      dest[count++] = val2;
      count += GetTemporaries(dest + count, List[val2].genBy->numOps(),
			      List[val2].par1, List[val2].par2,
			      List[val2].par3);
      if (num >= 3) {
	dest[count++] = val3;
	count += GetTemporaries(dest + count, List[val3].genBy->numOps(),
				List[val3].par1, List[val3].par2,
				List[val3].par3);
      }
    }
  }
  return count;
}

// Calculates the length of an instruction sequence for up to three
// values. Takes common subexpressions into account. Uses the List of
// constants, as far as it is completed.
int CumulativeLen(int num, int op1, int op2, int op3) {

  int list[1000];
  
  int i,j, count;

  count = GetTemporaries(list, num, op1, op2, op3);
  
  // sort temporaries (bubblesort will hopefully do for the small list)
  int tmp;
  for (i = count - 1; i > 0; i--) {
    for (j = 0; j < i; j++) {
      if (list[j] > list[j+1]) {
	tmp = list[j];
	list[j] = list[j+1];
	list[j+1] = tmp;
      }
    }
  }

  // now count the number of _distinct_ entries
  j = 0;
  for (i = 0; i < count; i++) {
    if ((i == 0) || (list[i] != list[i-1])) {
      j++;
    }
  }

  return j;
}

int GetTemporariesSorted(int* list, int num, int val1, int val2, int val3)
{
  int i, j, count;

  count = GetTemporaries(list, num, val1, val2, val3);
  
  // sort temporaries (bubblesort will hopefully do for the small list)
  int tmp;
  for (i = count - 1; i > 0; i--) {
    for (j = 0; j < i; j++) {
      if (list[j] > list[j+1]) {
	tmp = list[j];
	list[j] = list[j+1];
	list[j+1] = tmp;
      }
    }
  }

  /*
  for (i = 0; i < count; i++) {
    fprintf(stderr,"%d ", list[i]);
  }
  fprintf(stderr,"\n");
  */

  // eliminate duplicate entries
  int end = count;
  j = 1;
  for (i = 1; i < end; i++) {
    if (list[i] != list[i-1]) {
      list[j++] = list[i];
    } else {
      count--;
    }
  }

  /*
  for (i = 0; i < count; i++) {
    fprintf(stderr,"%d ", list[i]);
  }
  fprintf(stderr,"\n\n");
  */

  return count;  
}

int main(void)
{
  int SeqLen;  // current sequence length
  int CurOp;   // current operator
  int CurVal;  // current value
  int CurLen;  // temporary sequence length
  int op1, op2, op3;

  int Left = 65536;  // number of empty entries

  int list[1000];    // array of existing temporary values
  int count, i;      // number of existing temporaries

  initops();

  //printf("OpCount: %d\n", OpCount);

  // seed values that don't need parameters
  for (CurOp = 0; CurOp < OpCount; CurOp++) {
    if (OpPool[CurOp]->numOps() == 0) {
      CurVal = OpPool[CurOp]->evaluate(0,0,0);
      List[CurVal].genBy = OpPool[CurOp];
      List[CurVal].numinst = 1;
      Left--;
    }
  }
  
  for (SeqLen = 2; SeqLen <= 6; SeqLen++){
    fprintf(stderr, "sequence length: %d (%d entries left)\n", SeqLen, Left);
    for (CurOp = 0; CurOp < OpCount; CurOp++) {
      fprintf(stderr,"  %d  (%d left)       \r",OpCount - CurOp, Left);
      switch(OpPool[CurOp]->numOps()) {
      case 0:
	// do nothing here, table is seeded above
	break;
      case 1:
	// single parameter case: just lengthen the chain
	for (op1 = 65535; op1 >= 0; op1--){
	  if (List[op1].numinst == SeqLen - 1) {
	    if (OpPool[CurOp]->isApplicable(op1, 0, 0)) {
	      CurVal = OpPool[CurOp]->evaluate(op1, 0, 0);
	      if (List[CurVal].numinst == 0) {
		List[CurVal].genBy = OpPool[CurOp];
		List[CurVal].par1 = op1;
		List[CurVal].numinst = SeqLen;
		Left--;
	      }
	    }
	  }
	}
	break;
      case 2:
	// dual parameter case: combine two chains
	for (op1 = 65535; op1 >= 0; op1--) {
	  if (List[op1].numinst == 0 || List[op1].numinst >= SeqLen) {
	    continue;
	  }

	  // check if op2 has to be an existing temporary value
	  if (List[op1].numinst == (SeqLen - 1)) {
	    count = GetTemporariesSorted(list, 1, op1, 0, 0);
	    for (count -= 1; count >= 0; count--) {
	      op2 = list[count];
	      
	      if (OpPool[CurOp]->isApplicable(op1, op2, 0)) {
		CurVal = OpPool[CurOp]->evaluate(op1, op2, 0);
		if (List[CurVal].numinst == 0) {
		  List[CurVal].genBy = OpPool[CurOp];
		  List[CurVal].par1 = op1;
		  List[CurVal].par2 = op2;
		  List[CurVal].numinst = SeqLen;
		  Left--;
		}
	      }
	    }
	  } else {
	    for (op2 = 65535; op2 >= 0; op2--) {
	      if (List[op2].numinst == 0 || List[op2].numinst >= SeqLen) {
		continue;
	      }
	      
	      // calculate sequence length (including common subexpressions)
	      CurLen = CumulativeLen(2, op1, op2, 0);
	      
	      if (CurLen != (SeqLen - 1)) {
		continue;
	      }
	      
	      if (OpPool[CurOp]->isApplicable(op1, op2, 0)) {
		CurVal = OpPool[CurOp]->evaluate(op1, op2, 0);
		if (List[CurVal].numinst == 0) {
		  if (CurLen == (SeqLen - 1)) { 
		    List[CurVal].genBy = OpPool[CurOp];
		    List[CurVal].par1 = op1;
		    List[CurVal].par2 = op2;
		    List[CurVal].numinst = SeqLen;
		    Left--;
		  }
		}
	      }
	    }
	  }
	}
	break;
      case 3:
	// triple parameter case: combine three chains
	if (SeqLen <= 5) {   // limit to early cases with few values
	                     // anything else would take far too long
	  for (op1 = 65535; op1 >= 0; op1--) {
	    if (List[op1].numinst == 0 || List[op1].numinst >= SeqLen) {
	      continue;
	    }
	    fprintf(stderr,"  %d  (%d left)  %04x\r",
		    OpCount - CurOp, Left, op1);

	    // check if ops 2 and 3 have to be existing temporaries
	    if (List[op1].numinst == (SeqLen - 1)) {
	      count = GetTemporaries(list, 1, op1, 0, 0);
	      for (count -= 1; count >= 0; count--) {
		op2 = list[count];
		for (i = 0; i <= count; i++) {
		  op3 = list[i];

		  if (OpPool[CurOp]->isApplicable(op1, op2, op3)) {
		    CurVal = OpPool[CurOp]->evaluate(op1, op2, op3);
		    if (List[CurVal].numinst == 0) {
		      List[CurVal].genBy = OpPool[CurOp];
		      List[CurVal].par1 = op1;
		      List[CurVal].par2 = op2;
		      List[CurVal].par3 = op3;
		      List[CurVal].numinst = SeqLen;
		      Left--;
		    }
		  }		  
		}
	      }
	    } else {
	      for (op2 = 65535; op2 >= 0; op2--) {
		if (List[op2].numinst == 0 || List[op2].numinst >= SeqLen) {
		  continue;
		}
		// check if ops 1 and 2 take too many instructions
		count = GetTemporariesSorted(list, 2, op1, op2, 0);
		if (count >= SeqLen) {
		  continue;
		}
		// check if op3 has to be an existing temporary
		if (count == SeqLen - 1) {
		  for (count -= 1; count >= 0; count--) {
		    op3 = list[count];
		    if (OpPool[CurOp]->isApplicable(op1, op2, op3)) {
		      CurVal = OpPool[CurOp]->evaluate(op1, op2, op3);
		      if (List[CurVal].numinst == 0) {
			List[CurVal].genBy = OpPool[CurOp];
			List[CurVal].par1 = op1;
			List[CurVal].par2 = op2;
			List[CurVal].par3 = op3;
			List[CurVal].numinst = SeqLen;
			Left--;
		      }
		    }		    
		  }
		} else {
		  for (op3 = 65535; op3 >= 0; op3--) {
		    if (List[op3].numinst == 0 || List[op3].numinst >= SeqLen){
		      continue;
		    }
		    
		    // calculate sequence length (including common subexpr)
		    CurLen = CumulativeLen(3, op1, op2, op3);
		    
		    if (CurLen != (SeqLen - 1)) {
		      continue;
		    }
		    
		    if (OpPool[CurOp]->isApplicable(op1, op2, op3)) {
		      CurVal = OpPool[CurOp]->evaluate(op1, op2, op3);
		      if (List[CurVal].numinst == 0) {
			if (CurLen == (SeqLen - 1)) { 
			  List[CurVal].genBy = OpPool[CurOp];
			  List[CurVal].par1 = op1;
			  List[CurVal].par2 = op2;
			  List[CurVal].par3 = op3;
			  List[CurVal].numinst = SeqLen;
			  Left--;
			}
		      }
		    }
		  }
		}
	      }
	    }
	  }
	}
	break;
      default:
	fprintf(stderr, "wrong number of params for op %d\n", CurOp);
      }
      // table full, no need to look for longer sequences
      if (Left <= 0)
	break;
    }

    // table full, no need to look for longer sequences
    if (Left <= 0)
      break;
  }

  fprintf(stderr, "(%d entries left)        \n", Left);

  printtable();

  deleteops();
  return 0;
}
