#ifndef __INDEX_H__ /* file wrapper */
#define __INDEX_H__
/*
 * Jeffrey Friedl
 * Omron Corporation			ʳ
 * Nagaokakyoshi, Japan			617Ĺ
 *
 * jfriedl@nff.ncl.omron.co.jp
 *
 * This work is placed under the terms of the GNU General Purpose License
 * (the "GNU Copyleft").
 */

#ifndef __PACKED_LIST_H__
# ifndef __SYSTEM_H__
#   include "system.h"
# endif
# if defined(_HAVE_SYS_STDTYPES_H_)
#  include <sys/stdtypes.h>
# else
#  include <sys/types.h>
# endif
#include "packed_list.h"
#endif
#include "virtfile.h"

#define INDEX_MAGIC                 0x6a647800   /* "jdx\0" */ 
#define INDEX_VERSION_MAJOR         1
#define INDEX_VERSION_MINOR         2

/*
 * Jeffrey Friedl, Omron Corporation.
 * jfriedl@nff.ncl.omron.co.jp
 * October 1993
 *
 * Conceptually, following the index for a character will yield the lines
 * in the text file which contain that character.
 * 
 * One more level closer to reality (how it's actually implemented), this
 * is returned as an array of pointers (to each line) and a count indicating
 * how many elements in the array (the array is not null-terminated).
 *
 * One more level closer, the array elements aren't really pointers, but
 * offsets from the start of the file (or from the start of the memory
 * into which the file's been loaded).
 *
 * One more level closer, each element doesn't actually hold the offset into
 * the file, but the _difference_ from the previous offset (the first
 * element actually holding the real offset into the file, as the "previous"
 * offset of the first is zero).
 *
 * One more level closer, it's really not an array of values (differences)
 * but the appropriate number of sequential sets of value-indicating bytes.
 * These are implemented via "packed_list.h".
 */


/*
 * IndexOffset - holds an offset from the beginning of the index
 * (file or allocated memory) to some point in the index.
 * These are found in the index itself.
 */
typedef unsigned long IndexOffset;

/*
 * Also found only in the index itself, TextOffset holds an offset from
 * the start of the text (file or memory) to some point into the text
 * (i.e. starts of lines).
 */
typedef unsigned long TextOffset;

/*
 * The following two macros convert from in-memory text or index pointers
 * into the appropriate xxxxOffset types.
 */
#define makeIndexOffset(IndexStartPointer, PointerSomewhereIntoIndexMemory) \
    ((IndexOffset)((const unsigned char *)(PointerSomewhereIntoIndexMemory) - \
		   (const unsigned char *)(IndexStartPointer)))

#define makeTextOffset(FileStartPointer, PointerSomewhereIntoFileMemory) \
    ((TextOffset)((const unsigned char *)(PointerSomewhereIntoFileMemory) - \
		  (const unsigned char *)(FileStartPointer)))

/*
 * The opposite, converts an OFFSET from the beginning of BASE (which is
 * a pointer) to a pointer of the given TYPE.
 */
#define realptr(base, offset, type) \
    ((type)((const unsigned char *)(base)+(offset)))


/*
 * EUC Japanese are double-byte characters, each with the high bit set.
 * When we find one of these, we look at it as a HI byte and a LO byte,
 * with the high bits cleared (thereby setting their possible range from
 * [0x80 - 0xff]  to  [0x00 - 0x7f], which is a bit more convenient to
 * work with).  For "regular" characters, we use HI=0, LO="regular byte".
 *
 * Basically, we would like to be able to do something like
 *     ListOfLinesForOneCharacter = index[HI][LO]
 * but that would require a larger array than we want, since well over half
 * of the possible codes aren't used.
 *
 * So I compromise a bit... I keep the [HI] part (all 128 slots), but each
 * of those 128 slots is, rather than 128 slots for [LO], some variable
 * number of slots along with info indicating how many are there.
 *
 * If, for example, the lowest LO for [HI=123] is 10 and the highest LO
 * used is 20, I would keep 11 slots with first_lo of 10 and end_lo of 20.
 *
 * The ListOfLinesForOneCharacter is logically an array of pointers,
 * so my index structure might look something like
 *
 *   struct
 *   {
 *         int first_lo, end_lo;
 *         unsigned char *lists_of_lines[];  <--- indexed by (LO-first_lo)
 *         unsigned list_counts[];           <--- indexed by (LO-first_lo)
 *   } hi[128];
 *
 * But rather than use "char *foo[]", I use IndexOffset so that it can make
 * sense in core or on disk.
 */
struct index
{
    unsigned magic;                 /* INDEX_MAGIC */
    unsigned short version_major;   /* INDEX_VERSION_MAJOR */
    unsigned short version_minor;   /* INDEX_VERSION_MINOR */

    unsigned indexsize;	  /* size of complete index, including this header */
    unsigned linecount;   /* FYI, number of lines in the file indexed */
    unsigned limitcount;  /* FYI, if char on this many lines, not in index */
    FILE *FileP;

    #define IsMemIndex(I)   ((I)->FileP != NULL)
       
    time_t   st__mtime;	  /* st_mtime of file indexed */

    /* the real index */
    struct
    {
	unsigned char first_lo;
	unsigned char end_lo;
	IndexOffset listcount;
	IndexOffset shifted_lo;
    } hi[128];
    /* the rest of the index follows... (indexsize-sizeof(index)) bytes */
};

#ifndef USE_SHORT_INDEX_COUNTS
#define USE_SHORT_INDEX_COUNTS 1
#endif
#if USE_SHORT_INDEX_COUNTS
  typedef unsigned short elementcount;
#else
  typedef unsigned elementcount;
#endif
#define SKIPPED_COUNT ((elementcount)~0)
#define MAX_COUNT (SKIPPED_COUNT - 1)

#if !defined(__GNUC__)
#  if !defined(__volatile__)
#    define __volatile__ /*nothing; for use with volatile functions */
#  endif
#  if !defined(__inline__)
#    define __inline__ /*nothing; for use with volatile functions */
#  endif
#endif

/*
 * More or less does the virtual
 *     dest = index[hi].list_of_counts[lo]
 * returning true if it could be gotten, false if there was no such
 * info entered (i.e. if HI and/or LO were bad).
 */
static __inline__ int
get_index_count(const struct index *i, unsigned char hi,
		unsigned char lo, elementcount *dest)
{
    /* make sure that the LO falls into the range of lo's for the HI */
    if (lo < i->hi[hi].first_lo || lo >= i->hi[hi].end_lo)
	return 0; /* no such character in the index */

    *dest = realptr(i, i->hi[hi].listcount, elementcount *)
	                                 [lo - i->hi[hi].first_lo];
    return 1;
}


/*
 * More or less does the virtual
 *     dest = index[hi].list_of_counts[lo]
 * returning true if it could be gotten, false if there was no such
 * info entered (i.e. if HI and/or LO were bad).
 */
static __inline__ int
mem_get_index_count(const struct index *i,
		    unsigned char hi,
		    unsigned char lo,
		    elementcount *dest)
{
    long loc;
    /* make sure that the LO falls into the range of lo's for the HI */
    if (lo < i->hi[hi].first_lo || lo >= i->hi[hi].end_lo)
	return 0; /* no such character in the index */

    
    loc = (long)&realptr(0, i->hi[hi].listcount, elementcount *)
	[lo - i->hi[hi].first_lo];

    if (fseek(i->FileP, loc, SEEK_SET) != 0)
	die("bad fseek to %ld (fp=%x) at %s line %d, returned %ld: %n\n",
	    (long)loc, i->FileP, __FILE__, __LINE__);

    fread(dest, sizeof(elementcount), 1, i->FileP);
    return 1;
}


static __inline__ int
get_index_list(const struct index *i,
	       unsigned char hi,
	       unsigned char lo,
	       const unsigned char **dest)
{
    /* make sure that the LO falls into the range of lo's for the HI */
    if (lo < i->hi[hi].first_lo || lo >= i->hi[hi].end_lo)
	return 0; /* no such character in the index */
    else {
        IndexOffset listoffset =
	    realptr(i, i->hi[hi].shifted_lo, IndexOffset *)
		[lo - i->hi[hi].first_lo];
	*dest = realptr(i, listoffset, unsigned char *);
	return 1;
    }
}

static __inline__ int
mem_get_index_list(const struct index *i,
		   unsigned char hi,
		   unsigned char lo,
		   IndexOffset *dest)
{
    /* make sure that the LO falls into the range of lo's for the HI */
    if (lo < i->hi[hi].first_lo || lo >= i->hi[hi].end_lo)
	return 0; /* no such character in the index */
    else {
	long loc = (long) &realptr(0, i->hi[hi].shifted_lo, IndexOffset *)
	    [lo - i->hi[hi].first_lo];

	if (fseek(i->FileP, loc, SEEK_SET) != 0)
	    die("bad fseek to %ld (fp=%x) at %s line %d, returned %ld: %n\n",
		loc, i->FileP, __FILE__, __LINE__);

	if (fread(dest, sizeof(*dest), 1, i->FileP) !=1)
	    die("bad read from fp=%x at %s line %d: %n\n",
		i->FileP, __FILE__, __LINE__);
	return 1;
    }
}

/* other things defined in index.c */
extern struct index *
create_index(VirtFile *v, unsigned percent, unsigned flags);


/* These flags must be distinct from those in loadfile.h */
#define INDEX_REPORT_PROGRESS    0x00000001
#define INDEX_REPORT_SKIPPED     0x00000002
#define INDEX_REPORT_STATS	 0x00000004

extern int is_index_file(const char *filename);
struct index *read_index_file(const char *filename, int try, unsigned flags);
struct index *mem_read_index_file(const char *filename);
int write_index_file(const char *filename, const struct index *i);

#undef __inline__

#endif /* file wrapper */
