// Hash compacted storage of a reachable state set -*- c++ -*-

#ifdef __GNUC__
# pragma implementation
#endif // __GNUC__
#include "CompactSet.h"
#include "ByteBuffer.h"
#include <stdlib.h>
#include <assert.h>
#include <time.h>

/** @file CompactSet.C
 * Transient, hash compacted reachability set storage
 */

/* Copyright  2002 Marko Mkel (msmakela@tcs.hut.fi).

   This file is part of MARIA, a reachability analyzer and model checker
   for high-level Petri nets.

   MARIA is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   MARIA is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   The GNU General Public License is often shipped with GNU software, and
   is generally kept in a file called COPYING or LICENSE.  If you do not
   have a copy of the license, write to the Free Software Foundation,
   59 Temple Place, Suite 330, Boston, MA 02111 USA. */

/** Number of bits size_t holds */
#define SIZE_T_BIT (sizeof (size_t) * CHAR_BIT)

/** primes[x] == prime closest to 32 << x */
static const unsigned
primes[] = {
  0x1f,
  0x3d,
  0x7f,
  0x101,
  0x1fd,
  0x3fd,
  0x805,
  0xffd,
  0x1fff,
  0x3ffd,
  0x8003,
  0xfff1,
  0x1ffff,
  0x3fffb,
  0x7ffff,
  0xffffd,
  0x1ffff7,
  0x3ffffd,
  0x7ffff1,
  0xfffffd,
  0x2000023,
  0x3fffffb,
  0x800001d,
  0x10000003,
  0x1ffffffd,
  0x40000003,
  0x7fffffff,
  0xfffffffb
};

// Begin imported code

/* The following code is imported from utils/md5.c from dpkg-1.6.15.
 * This implementation of the MD5 algorithm was originally written by
 * Colin Plumb in 1993 and updated by Ian Jackson <ijackson@nyx.cs.du.edu>.
 * The original code is in the public domain.
 */

/** @name The four core functions - F1 is optimized somewhat */
/*@{*/
/* #define F1(x, y, z) (x & y | ~x & z) */
#define F1(x, y, z) (z ^ (x & (y ^ z)))
#define F2(x, y, z) F1(z, x, y)
#define F3(x, y, z) (x ^ y ^ z)
#define F4(x, y, z) (y ^ (x | ~z))
/*@}*/

/** This is the central step in the MD5 algorithm. */
#define MD5STEP(f,w,x,y,z,in,s) \
	 (w += f(x,y,z) + in, w = (w<<s | w>>(32-s)) + x)

/**
 * The core of the MD5 algorithm, this alters an existing MD5 hash to
 * reflect the addition of 16 longwords of new data.
 * @param buf	(input/output) the buffer for the hash value
 * @param in	the data to be added to the hash value
 */
static void
MD5Transform (unsigned buf[4], const unsigned in[16])
{
  register unsigned a, b, c, d;

  a = buf[0];
  b = buf[1];
  c = buf[2];
  d = buf[3];

  MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
  MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
  MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
  MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
  MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
  MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
  MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
  MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
  MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
  MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
  MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
  MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
  MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
  MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
  MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
  MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);

  MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
  MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
  MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
  MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
  MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
  MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
  MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
  MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
  MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
  MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
  MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
  MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
  MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
  MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
  MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
  MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);

  MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
  MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
  MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
  MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
  MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
  MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
  MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
  MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
  MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
  MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
  MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
  MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
  MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
  MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
  MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
  MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);

  MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
  MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
  MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
  MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
  MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
  MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
  MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
  MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
  MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
  MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
  MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
  MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
  MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
  MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
  MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
  MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);

  buf[0] += a;
  buf[1] += b;
  buf[2] += c;
  buf[3] += d;
}

// End imported code

/** Compute an MD5 hash value, ignoring endianness
 * @param buf	the data to be hashed
 * @param size	the length of the data, in bytes
 * @param hash	(output) the hash value (word-aligned)
 */
static void
md5hash (const unsigned* buf,
	 size_t size,
	 unsigned hash[4])
{
  // initialize the hash value with the magic constants
  hash[0] = 0x67452301;
  hash[1] = 0xefcdab89;
  hash[2] = 0x98badcfe;
  hash[3] = 0x10325476;

  if (const unsigned blocks = size / (16 * sizeof (unsigned))) {
    // first transform the whole blocks
    for (const unsigned* const end = buf + blocks * 16; buf < end; buf += 16)
      MD5Transform (hash, buf);
    // subtract the transformed blocks from the size
    size %= 16 * sizeof (unsigned);
  }

  // pad the last block with a pattern consisting of 0x80 0x00 0x00 ...
  static unsigned lastbuf[16];
  memcpy (lastbuf, buf, size);
  reinterpret_cast<unsigned char*>(lastbuf)[size] = 0x80;
  memset (reinterpret_cast<unsigned char*>(lastbuf) + size + 1,
	  0, (sizeof lastbuf) - (size + 1));
  MD5Transform (hash, lastbuf);
}

CompactSet::CompactSet (unsigned size,
			unsigned width) :
  StateSet (),
  myNumCollisions (0), myHashSize (0), myHashWidth (width),
  myMajorHash (0), myMinorHash (0)
{
  assert (size > 0);
  assert (width > 0 && width <= 16);
  /* round the hash table size to the closest prime in our table */
  unsigned s;
  for (s = 0; s < ((sizeof primes) / sizeof *primes) - 1; s++)
    if (primes[s] >= size)
      break;
  const_cast<unsigned&>(myHashSize) = primes[s];
}

CompactSet::~CompactSet ()
{
  if (myMajorHash)
    free (myMajorHash);
  if (myMinorHash)
    free (myMinorHash);
}

bool
CompactSet::init ()
{
  // nothing must have been initialized before
  assert (!myMajorHash && !myMinorHash && !getNumStates ());
  // the major hash data type must be 1<<(1<<n) chars wide
  assert (!((sizeof *myMajorHash) & ((sizeof *myMajorHash) - 1)));

  if (!openFile ())
    return false;

  /** width of a hash entry in the major table, in bytes */
  const unsigned major = myHashWidth & ~((sizeof *myMajorHash) - 1);

  if (major &&
      !(myMajorHash = static_cast<unsigned*>(calloc (major * myHashSize, 1))))
    return false;

  if (major == myHashWidth ||
      (myMinorHash = static_cast<unsigned char*>
       (calloc ((myHashWidth - major) * myHashSize, 1))))
    return true;

  if (myMajorHash) free (myMajorHash), myMajorHash = 0;
  return false;
}

bool
CompactSet::do_add (const void* buf,
		    size_t size)
{
  if (myHashSize <= getNumStates ())
    return false; // the hash table is full already

  /** The MD5 hash value of the data */
  unsigned hash[4];
  md5hash (static_cast<const unsigned*>(buf), size, hash);
  /** initial hash index */
  unsigned h = hash[0] % myHashSize;
  /** the increment in the linear hash probing sequence */
  const unsigned h2 = hash[1] % (myHashSize - 1) + 1;
  /** width of a hash entry in the major table, in bytes */
  const unsigned major = myHashWidth & ~((sizeof *myMajorHash) - 1);

  for (unsigned probe = 0; probe < myHashSize;
       probe++, myNumCollisions++, h = (h + h2) % myHashSize) {
    if (major) {
      unsigned words = major / sizeof *myMajorHash;
      const unsigned* const m = &myMajorHash[h * words];
      while (words--) {
	if (m[words]) {
	  if (memcmp (reinterpret_cast<unsigned char*>(myMajorHash) +
		      (h * major), hash, major))
	    goto noMatch; // no match in the major part
	  if (myHashWidth - major &&
	      memcmp (myMinorHash + (h * (myHashWidth - major)),
		      hash + major, myHashWidth - major))
	    goto noMatch; // no match in the minor part
	  return false; // the state was already stored
	}
      }
      // the major part is empty, but is the minor part?
      if (unsigned minor = myHashWidth - major)
	for (const unsigned char* const m = &myMinorHash[h * minor]; minor--; )
	  if (m[minor])
	    goto noMatch;
      // both parts of the hash table entry are empty -> this is a new state

      // initialize the major part of the hash table entry at h
      memcpy (reinterpret_cast<unsigned char*>(myMajorHash) +
	      (h * major), hash, major);
    }
    else {
      // there is no major part; compare the minor part only
      const unsigned char* const m = &myMinorHash[h * myHashWidth];
      for (unsigned bytes = myHashWidth; bytes--; ) {
	if (m[bytes]) {
	  if (memcmp (m, hash, myHashWidth))
	    goto noMatch; // no match -> probe again
	  return false; // the state was already stored
	}
      }
      // the hash table entry is empty -> this is a new state
    }

    // initialize the hash table entry at h
    memcpy (myMinorHash + (h * (myHashWidth - major)), hash + major,
	    myHashWidth - major);

    // update the statistics, the search list and the trace information
    newState ();

    assert (myPathFileLength == ftell (myPathFile));
    assert (!myOffset || myOffset < myPathFileLength);

    mySearch.push (buf, size, myPathFileLength);
    {
      class BytePacker p;
      p.append (myOffset), p.append (size);
      fwrite (p.getBuf (), 1, p.getLength (), myPathFile);
      myPathFileLength += p.getLength () + size;
    }
    fwrite (buf, 1, size, myPathFile);
    return true;

  noMatch:
    continue;
  }

  /* the hash table is full */
  return false;
}

word_t*
CompactSet::getState (long pos, size_t* size) const
{
  unsigned char rbuf[8];
  class ByteUnpacker u (rbuf);
  assert (pos < myPathFileLength);
  fseek (myPathFile, pos, SEEK_SET);
  fread (rbuf, sizeof rbuf, 1, myPathFile);
  unsigned offset = u.extract ();
  unsigned len = u.extract (); u.buf = rbuf;
  word_t* state = new word_t[len + (sizeof (word_t) - 1) / sizeof (word_t)];
  fseek (myPathFile,
	 pos + BytePacker::size (offset) + BytePacker::size (len), SEEK_SET);
  fread (state, 1, len, myPathFile);
  *size = len;
  return state;
}

word_t*
CompactSet::pop (bool tail, size_t& size)
{
  if (mySearch.empty ())
    return 0;
  return mySearch.pop (tail, myOffset, &size);
}
