V10/cmd/bcp/Text.h

/* Copyright (c) 1989, 1990 AT&T --- All Rights Reserved.              */
/* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T.                */
/* The copyright notice does not imply actual or intended publication. */
/* AUTHORS:                                            */
/*     H. S. Baird - ATT-BL MH - first versions        */

/* Text.h - typedefs, constants, and function declarations for document-images
	(see Text.c for companion functions.)
   INCLUDES
	requires prior:
		#include "stdocr.h"
 */

#define dbg_fwrb_toa (F)	/* err("%s",R_toa()) for each record written */
#define dbg_frdb_toa (F)	/* err("%s",R_toa()) for each record read */

#define DIM_VERSION (0)		/* current version no. of Dim-file format */

#include "Bfeats.h"

#define Ident int	/* identification bits [MUST BE >=32 bits] */

#define fwri_Ident(F,V) fwri_uint4((F),(V))
#define frdi_Ident(F) frdi_uint4(F)

/* Ident bits */
/* identifies external file record type(s) */
#define IsPage		04000000000
#define IsBlock		02000000000
#define IsTxtln		01000000000
#define IsWord		00020000000
#define IsChar		00400000000
#define IsBlob		00200000000
#define IsRun		00100000000	/* should be IsRuns or IsLag */
#define IsRuns		(IsRun)
#define IsInterp	00040000000	/* Char interpretation */
#define IsBdy		00010000000
#define IsShapes	00004000000
#define IsLag		00002000000
#define IsBfeats	00001000000
#define IsSfeats	00000400000
#define IsWordInterp	00000200000
/* Runs:  set in Blob records' ids.
   At most one of these may be set in main memory.
   Only Runs_ff or Runs_g4 may be set in peripheral file format. */
#define Runs_f		00000010000	/* Runs are in list, starting *.r.f */
#define Runs_ff		00000020000	/* RunFs are in array, at *.r.ff */
#define Runs_seek	00000040000	/* Runs are still in file, at *.seek */
#define Runs_g4		00000100000	/* Runs are in CCITT Group 4 format */

/* CCITT Group 4 format in peripheral files consists of (a) an unsigned long count,
   then (b) that many bytes of Group 4 encoding exactly as in CCITT
   Recommendation T.6, except that no EOFB code is appended, since the end of
   the bitmap can be detected using the bounding box of the owning Blob.
   Instead, the last scan line of the bitmap is merely padded to the nearest
   full byte with '0' bits.  This compresses the representation by about
   a factor of 8 on average, compared to RunF/RunFS encoding. */

#define IsALL (IsPage|IsBlock|IsTxtln|IsWord|IsChar|IsBlob|IsRun|IsBdy|IsInterp|IsShapes|IsLag|IsBfeats|IsSfeats|IsWordInterp)
#define IsNONE		0


/* Enable optional debugging-support code to maintain a count of selected record
   types that are allocated using alloc_* free_* and dup_* functions.  The counts
   are in units of records, not bytes.  By design, has no effect at all on
   correctness.  */
#define ALLOC_CENSUS (0)

#if ALLOC_CENSUS

typedef struct Census {
	int Page_mny;
	int Block_mny;
	int Txtln_mny;
	int Word_mny;
	int Char_mny;
	int Blob_mny;
	int Run_mny;
	int Interp_mny;
	int Bfeats_mny;
	int BMask_mny;
	} Census;

#define Init_Census {0,0,0,0,0,0,0,0,0,0}
#if MAIN
Census empty_Census = Init_Census;
Census _CENSUS = Init_Census;
#else
extern Census empty_Census;
extern Census _CENSUS;
#endif

#define alloc_census(id,n) _CENSUS./**/id/**/_mny += (n)
#define free_census(id,n) _CENSUS./**/id/**/_mny -= (n)
#define err_census(S,C) err("%s P%d B%d l%d w%d c%d b%d r%d i%d bf%d bm%d",\
	S,(C)->Page_mny,(C)->Block_mny,(C)->Txtln_mny,(C)->Word_mny,\
	(C)->Char_mny,(C)->Blob_mny,(C)->Run_mny,(C)->Interp_mny,\
	(C)->Bfeats_mny,(C)->BMask_mny )
#define err_census_all err_census("allocated: ",&(_CENSUS))
#define err_census_rec(rp) { \
	Census *cs;  cs = (Census *)census_rec((rp)); \
	err_census(ident_toa((rp)->ident),cs); \
	}
#else

#define alloc_census(i,n)
#define free_census(i,n)
#define err_census(s,c)
#define err_census_all
#define err_census_rec(r)

#endif


/* Most record types can own an ASCII label, which is simply a
   '\0'-terminated string.  Its uses are varied. */
#define MAX_LABEL_LEN 128	/* maximum no. characters in a label string */

#if FWRI

#if dbg_fwrb_toa
#define fwrb_label(F,L) { \
	fwri_str((F),(L)); \
	err("fwrb_label: \"%s\"",(L)); \
	}
#else
#define fwrb_label(F,L) { \
	fwri_str((F),(L)); \
	}
#endif

#else

#if dbg_fwrb_toa
#define fwrb_label(F,L) { \
	fputs((L),(F)); \
	fputc('\0',(F)); \
	err("fwrb_label: \"%s\"",(L)); \
	}
#else
#define fwrb_label(F,L) { \
	fputs((L),(F)); \
	fputc('\0',(F)); \
	}
#endif

#endif

char *frdb_label();

/* A boundary is an ordered list of vertices.
   In some uses, it is assumed to close:  in this case, the first point
   is not repeated at the end.
   `vn' counts the no. vertices.  bdy_trace omits consecutive duplicates and
   compresses horizontal and vertical runs.
   `per' counts the no. of pixels on the 8-connected boundary.  An attempt has been
   made not to count consecutive duplicate pixels, but this may be buggy.
   `ren' counts the number of run-ends touched.  Each run contributes two ends,
   even if it is one pixel long.  The sum of ren-counts among all bdys for a blob
   should equal exactly twice the no. runs.
   Note that vn<=per and vn<=2*ren, and (probably) ren<=per.
   A ``smoothed'' version (courtesy of John Hobby) may be given in s[],
   expressed in fractional pixels.
   */
typedef struct Bdy {
	Ident ident;	/* shows type of boundary */
	Bbx bx;		/* bounding box (usually relative to blob's bx.a) */
	long per;	/* no. pixels in 8-connected perimeter */
	int ren;	/* no. run-ends touched */
	int vn;		/* no. distinct vertices in v[] */
	Sp *v;		/* array of vn+1 vertices, v[0]==v[vn] (malloc space) */
	short fr;	/* fraction of pixels used in smoothed outline */
	int sn;		/* no. distinct smoothed vertices in s[] */
	Sp *s;		/* array of sn+1 vertices, s[0]==s[sn] (malloc space) */
	int an;		/* no. vertices in polygonal approximation */
	Sp **ap;	/* approx'n: array of an ptrs into v[] (in malloc space) */
	float err;	/* error tolerance used for approximation */
	int hn;		/* no. vertices in convex hull of polygonal approx'n */
	Sp ***hpp;	/* convex hull: array of hn ptrs into ap[] (malloc) */
	struct Bdy *n;	/* for use when a member of a linked-list */
	} Bdy;

#define Init_Bdy {IsBdy,Init_Bbx,0,0,0,NULL,1,0,NULL,0,NULL,0.0,0,NULL,NULL}
#if MAIN
Bdy empty_Bdy = Init_Bdy;
#else
extern Bdy empty_Bdy;
#endif

#define Bdy_verts	00000000001	/* vertices */
#define Bdy_approx	00000000002	/* polygonal approx'n */
#define Bdy_hull	00000000004	/* convex hull */
#define Bdy_ALL		(Bdy_verts|Bdy_approx|Bdy_hull)
#define Bdy_ccw		00000000100	/* winding order is counter-clockwise*/
#define Bdy_half	00000000200	/* uses half-pixel boundary points */

/* A boundaries-set is an ordered list of boundaries.
   In some uses, they are used to enclose a connected region:  in this case,
   all are closed, and the first is conventionally the exterior and the others
   are interior boundaries.  The interior is then always to the left
   of the boundary:  that is, the exterior boundary is oriented counter-
   clockwise, and the interior boundaries clockwise */
typedef struct Bdys {
	int mny;	/* no. boundaries */
	long per;	/* perimeter of all bdys */
	Bdy *b;		/* array of boundaries (malloc space) (sometimes first) */
	} Bdys;

#define Init_Bdys {0,0,NULL}
#if MAIN
Bdys empty_Bdys = Init_Bdys;
#else
extern Bdys empty_Bdys;
#endif

/* A boundary edge is an ordered pair of vertices along a boundary.  It implicitly
   describes an 8-connected sequence of pixels from a to b, inclusive.  Also
   used for straight-line approximations to the set of pixels, and the convex hull
   of such an approximation. */
typedef struct BdyEdge {
	Bdy *byp;	/* boundary to which it belongs */
	Sp *ap,*bp;	/* ptrs into byp->v */
	long per;	/* perimeter:  no. 8-connected pixels */
	Pp ctr;		/* centroid */
	Radians ang;	/* ls-fitted angle (directed roughly from a to b) */
	Pp a;		/* endpoints a & b (with sub-pixel precision) */
	Pp b;
	} BdyEdge;

#define Init_BdyEdge {NULL,NULL,NULL,0,Init_Zero_Pp,0.0,Init_Zero_Pp,Init_Zero_Pp}
#if MAIN
BdyEdge empty_BdyEdge = Init_BdyEdge;
#else
extern BdyEdge empty_BdyEdge;
#endif

/* Ordered set of BdyEdges */
typedef struct BdyEdges {
	int mny;
	BdyEdge **pa;	/* NULL-terminated array of pointers to BdyEdges */
	} BdyEdges;

#define Init_BdyEdges {0,NULL}
#if MAIN
BdyEdges empty_BdyEdges = Init_BdyEdges;
#else
extern BdyEdges empty_BdyEdges;
#endif

/* Moments of a region (or boundary-list) of pixels */

typedef struct Moments {
	/* 0th moment */
	int M00;	/* area: sum of 1 */
	/* 1st moments */
	int M10;	/* sum of xi */
	int M01;	/* sum of yi */
	Pp c;		/* centroid: M10/M00, M01/M00 */
	/* 2nd moments (relative to centroid) */
	float M20;	/* sum of rxi*rxi */
	float M11;	/* sum of rxi*ryi */
	float M02;	/* sum of ryi*ryi */
	Radians a;	/* orientation angle, in [-PI/2,PI/2) (radians) */
	Pp d;		/* directional vector of principal axis */
	} Moments;

#define Init_Moments {0,0,0,{0.0,0.0},0.0,0.0,0.0,0.0,{0.0,0.0}}
#if MAIN
Moments zero_Moments = Init_Moments;
#else
extern Moments zero_Moments;
#endif

/* functions in Text.c */
Bdy *alloc_bdy();
Bdys *alloc_bdys();

/* functions in Bdy.c */
Bdys *dup_bdys_etc();
Bdy *dup_bdy_etc();
Bdys *boundaries();
char *moments_toa();
Moments *bdy_moments();
boolean fit_bdyedge();
BdyEdge *dup_bdyedge();
BdyEdge *append_bdyedge();
remove_bdyedge();
free_bdyedges();
BdyEdges *dup_bdyedges();

/* Each Run is initially inserted into a `line set', owned by its scan Line.
   Later, as connections are discovered, it joins a `tree set'.
   When the forest of trees of which it is a part is finally complete, the Run
   is removed from its `line set', and added to a `blob set'.
   */

typedef struct Run {	/* internal (main memory) record */
	Scoor y, xs, xe;	/* coordinates of black interval (y,[xs,xe]) */
	struct Run *n;		/* line & blob sets: next Run */
	unsigned short ad, bd;	/* tree set: above,below degrees (no. conn'd)*/
	struct Run *ac, *bc;	/* tree set: above,below leftmost connections */
	union {	struct Tree *o;	/* tree set: owner Tree */
		int no;		/* blob set: sequence no. 0,1,... in set */
		} u;		/* (overlain fields) */
	} Run;

#define Init_Run {0,0,0,NULL,0,0,NULL,NULL}
#if MAIN
Run empty_Run = Init_Run;
#else
extern Run empty_Run;
#endif

/* Peripheral file format.  `ac' and `bc' are relative to the position
   of this run in the canonical run order. */
typedef struct RunF {	/* external (peripheral file) record (full size) */
	Scoor y, xs, xe;	/* coordinates of black interval (y,[xs,xe]) */
	unsigned short ad, bd;	/* above,below degrees */
	unsigned short ac, bc;	/* above,below leftmost connections (indices) */
	} RunF;

#define Init_RunF {0,0,0,0,0,0,0}
#if MAIN
RunF empty_RunF = Init_RunF;
#else
extern RunF empty_RunF;
#endif

#define fwri_RunF(F,P) { \
	fwri_Scoor((F),(P)->y); \
	fwri_Scoor((F),(P)->xs); \
	fwri_Scoor((F),(P)->xe); \
	fwri_uint2((F),(P)->ad); \
	fwri_uint2((F),(P)->bd); \
	fwri_uint2((F),(P)->ac); \
	fwri_uint2((F),(P)->bc); \
	}

#define frdi_RunF(F,P) ( feof(F)? 0 : ( \
	(P)->y=frdi_Scoor(F), \
	(P)->xs=frdi_Scoor(F), \
	(P)->xe=frdi_Scoor(F), \
	(P)->ad=frdi_uint2(F), \
	(P)->bd=frdi_uint2(F), \
	(P)->ac=frdi_uint2(F), \
	(P)->bc=frdi_uint2(F), \
	(ferror(F)? -errno: 1) ) )

/* In `well-behaved' text, the overwhelming majority of Blobs are small
   enough that all their Runs can be encoded using character data fields,
   a factor of two saving, which is important since a dense IEEE proceedings
   page blob file would otherwise require 2.3Mbytes */
typedef struct RunFS {	/* external (peripheral file) record (small size) */
	unsigned char y, xs, xe;/* coordinates of black interval (y,[xs,xe]) */
	unsigned char ad, bd;	/* above,below degrees */
	unsigned char ac, bc;	/* above,below leftmost connections (indices) */
	} RunFS;

#define Init_RunFS {0,0,0,0,0,0,0}
#if MAIN
RunFS empty_RunFS = Init_RunFS;
#else
extern RunFS empty_RunFS;
#endif

#define fwri_RunFS(F,P) { \
	fwri_uint1((F),(P)->y); \
	fwri_uint1((F),(P)->xs); \
	fwri_uint1((F),(P)->xe); \
	fwri_uint1((F),(P)->ad); \
	fwri_uint1((F),(P)->bd); \
	fwri_uint1((F),(P)->ac); \
	fwri_uint1((F),(P)->bc); \
	}

#define frdi_RunFS(F,P) ( feof(F)? 0: ( \
	(P)->y=frdi_uint1(F), \
	(P)->xs=frdi_uint1(F), \
	(P)->xe=frdi_uint1(F), \
	(P)->ad=frdi_uint1(F), \
	(P)->bd=frdi_uint1(F), \
	(P)->ac=frdi_uint1(F), \
	(P)->bc=frdi_uint1(F), \
	(ferror(F)? -errno: 1) ) )

/* Set of runs.  PROPOSED NEW FORMAT.  Not yet incorporated widely. */
typedef struct Runs {
	Ident ident;	/* IsRuns & Runs_fi, Runs_ff, Runs_fs, or Runs_sk flags */
	int mny;	/* no. runs */
	union {	/* access to runs */
		struct Run *fi;	  /* first run of singly-linked list */
		struct RunF *ff;  /* top of RunF[mny] array */
		struct RunFS *fs; /* top of RunFS[mny] array */
		long sk;	  /* seek(F,seek,0) will find them in file F */
		} r;
	} Runs;

#define Init_Runs {IsRuns,0}	/* NOTE: can't initialize union */
#if MAIN
Runs empty_Runs = Init_Runs;
#else
extern Runs empty_Runs;
#endif

/* INTERNAL management */

#if !MAIN
extern
#endif
struct {
	int incr;		/* size of each pool[i] */
	int pools;		/* no. of pools allocated */
	Run **pool;		/* malloc space Run pool[pools][0..incr-1] */
	int next;		/* the next avail Run is:  pool[pools-1][next] */
	Run *free;		/* head of free lifo list (NULL if none) */
	Run *cur;		/* most-recently allocated Run */
	int total;		/* total no. ever allocated */
	boolean dbg;
	} _RunPool
#if MAIN
	= {0,0,NULL,0,NULL,NULL,0,F}
#endif
;

/* Run management routines (Text.c) */
boolean	alloc_run_pool();
	free_run_pool();
Run	*hard_alloc_run();
	err_run();
	err_runb();
	err_runf();
	err_runfs();
	err_run_stats();

/* Allocate a Run from the RunPool (returns (Run *)) -- mostly inline */
#define alloc_run() ( _RunPool.total++, (_RunPool.free!=NULL)? \
	(_RunPool.cur=_RunPool.free,_RunPool.free=_RunPool.cur->n, \
		*(_RunPool.cur)=empty_Run,_RunPool.cur): \
	( (_RunPool.next<_RunPool.incr)? \
	  (_RunPool.cur=_RunPool.pool[_RunPool.pools-1]+(_RunPool.next++), \
		*(_RunPool.cur)=empty_Run,_RunPool.cur): \
	  hard_alloc_run() ) )

/* Free a Run back into the RunPool -- entirely inline */
#define free_run(rp) { (rp)->n = _RunPool.free; _RunPool.free = (rp); }

/* EXTERNAL file format:
   If BlobF.runs is zero, then conventionally the Runs have simply been omitted.
   The RunF.y, RunF.xs, & RunF.xe coordinates are offsets from BlobF.bx.a 
   (their blob's left-top corner).  RunF.ac & RunF.bc index into an array of
   only those RunF records belonging to the current BlobF, in ascending
   lexicographic order on (RunF.y,RunF.xs) -- so that they are in the range
   [0,BlobF.runs-1].
 IMPROVEMENTS:
  */

/* some subroutines are too lazy to handle indefinitely large blobs */
#define Runs_Max 10000	

/* A Blob is (formally) a maximal 8-connected set of black pixels.
   The connectivity algorithm finds them in strictly increasing order on
   (y,xe) of its Run with highest (y,xe).
   */

typedef struct Blob {	/* internal (main memory) record */
	Ident ident;	/* identification bits */
	Seq no;		/* blob sequence no */
	Bbx bx;
	long area;
	long per;
	struct Blob *n;	/* free set: next blob */
	Merit m;	/* Only used locally (not for peripheral file) */
	Runs *rsp;	/* runs (not yet used) */
	Bdys *bdsp;	/* boundaries (in malloc space); NULL if none */
	/* presently in use (but planned to be replaced by Runs) */
	int runs;
	union {	/* to find runs */
		struct Run *f;		/* blob set: first run */
		struct RunF *ff;	/* top of RunF array */
		long seek;		/* seek(f,seek,0) will find them */
		} r;
	} Blob;

#define Init_Blob {IsBlob,0,Init_Bbx,0,0,NULL,0.0,NULL,NULL,0,}
#if MAIN
Blob empty_Blob = Init_Blob;
#else
extern Blob empty_Blob;
#endif

typedef struct Blobs {	/* Blob set */
	int mny;	/* the number of pointers in set */
	Blob **bpa;	/* pts to NULL-terminated array[mny+1] of pointers */
	} Blobs;

#define Init_Blobs {0,NULL}
#if MAIN
Blobs empty_Blobs = Init_Blobs;
#else
extern Blobs empty_Blobs;
#endif

/* Singly-linked list of Blobs.  Only forward `next' links Blob.n are used. */
typedef struct Blobl {	/* Blob list */
	int mny;	/* the number in set */
	Blob *fi;	/* to first */
	Blob *la;	/* to last */
	} Blobl;

#define Init_Blobl {0,NULL,NULL}
#if MAIN
Blobl empty_Blobl = Init_Blobl;
#else
extern Blobl empty_Blobl;
#endif

typedef struct BlobF {	/* external file format */
	Ident ident;	/* identification bits: IsBlob must be set */
	Bbx bx;
	long area;
	long per;
	int runs;	/* no. runs to follow */
	short bdys;	/* no. bdys to follow */
	} BlobF;

/* Blob identification bits */
#define Blob_lm		00000000001	/* touches left margin */
#define Blob_rm		00000000002	/* touches right margin */
#define Blob_tm		00000000004	/* touches top margin */
#define Blob_bm		00000000010	/* touches bottom margin */
#define Blob_chopt	00000000020	/* chopped (at the top) */
#define Blob_chopb	00000000040	/* chopped (at the bottom) */
#define Blob_chopl	00000002000	/* chopped (at the left) */
#define Blob_chopr	00000004000	/* chopped (at the right) */
#define Blob_small	00000000200	/* its runs (can be) compressed x2 */
#define Blob_local	00000000400	/* unassigned:  avail for local pgm use */

/* INTERNAL management: */

int hi_blob_no;		/* current highest blob no */

/* Blobs are allocated from a pool of free ones */
int blob_max;
Blob *blob_pool;
Blob blob_fr;		/* head of list of free blobs */
int blob_fr_mny;
int blob_hi;		/* high-water mark in blob pool */
int blob_chopped;	/* total no. of blobs that were chopped */
boolean blob_debug;	/* debug traces? */

/* EXTERNAL file format:
   A Blob file consists of an arbitrary number of:
       BlobF record, followed by BlobF.runs instances of:
           RunF record
   If BlobF.runs is zero, then conventionally the Runs have simply been omitted.
   The RunF.y, RunF.xs, & RunF.xe coordinates are relative offsets from BlobF.bx.a 
   (their blob's left-top corner).  RunF.ac & RunF.bc index into an array of
   only those RunF records belonging to the current BlobF, in ascending
   lexicographic order on (RunF.y,RunF.xs) -- so that they are in the range
   [0,BlobF.runs-1].  If ad(or, bd)==0, the ac(or, bc) is undefined (conn sets
   them conventionally to 0).
   */

/* Blob management routines (conBlob.c) */
Blob	*alloc_blob();
	free_blob();
boolean	alloc_blob_pool();
	free_blob_pool();
Blob	*alloc_pool_blob();
	free_pool_blob();
	out_blob();
        fwrb_blob_etc();
boolean frdb_blob_etc();
boolean	frdb_runfs();
	err_blob();
	err_blob_runs();
	err_blob_runfs();
	err_blob_briefly();
	err_blobf();
	err_blob_stats();
boolean	blob_small();

/* Compute height-above-baseline in ems of Char *cp w.r.t. Txtln *lp,
   on a page of y-resolution res.  The txtln's `basl' & `size' must be set up. */
#define char_bhgt(cp,lp,res) \
	((((cp)->bx.b.y - (lp)->basl)/(double)(res)*INS_PER_PT*(lp)->size))

/* an Interpretation of a Char */
typedef struct Interp {
	Ident ident;
	struct Cl *clp;		
	struct Class *clsp;
	ClassId ci;		/* class id (font, size, name, variant) */
	Merit mshap;		/* shape merit in [0,1] */
	Pts size;		/* implied text size */
	Merit msize;		/* size merit in [0,1] */
	Scoor basl;		/* implied absolute baseline location */
	Merit mbhgt;		/* height-above-baseline merit in [0,1] */
	Merit m;		/* match merit (due to mshap, msize, & mbhgt) */
	Prob p;			/* approximate probability */
	struct Interp *n;	/* next in singly-linked list */
	} Interp;

#define Init_Interp {IsInterp,NULL,NULL,Init_ClassId,0.0,0.0,0.0,0,0.0,0.0,0.0,NULL}
#if MAIN
Interp empty_Interp = Init_Interp;
#else
extern Interp empty_Interp;
#endif

/* Interp.ident flags: */
#define Interp_spelled	00000000001	/* has passed a spelling check */

#define free_interp(i) {free((i)); free_census(Interp,1); }

typedef struct InterpF {
	Ident ident;
	ClassId ci;		/* class id (font, size, name, variant) */
	Merit mshap;		/* shape merit */
	Pts size;		/* implied text size (in points) */
	Merit msize;		/* size merit */
	Scoor basl;		/* implied baseline location */
	Merit mbhgt;		/* height-above-baseline merit */
	Merit m;		/* match merit (due to shp, siz, hgt) */
	} InterpF;

/* a list of interpretations */
typedef struct Interpl {
	short mny;		/* no. in list */
	struct Interp *fi;	/* first in list (mny==0 -> fi==NULL) */
	} Interpl;

#define Init_Interpl {0,NULL}
#if MAIN
Interpl empty_Interpl = Init_Interpl;
#else
extern Interpl empty_Interpl;
#endif

/* a set of interpretations */
typedef struct Interps {
	short mny;		/* no. in set */
	struct Interp **pa;	/* NULL-terminated array of ptrs (malloc spc) */
	Merit m;		/* combined merit (normalized product) */
	} Interps;

#define Init_Interps {0,NULL,0.0}
#if MAIN
Interps empty_Interps = Init_Interps;
#else
extern Interps empty_Interps;
#endif

typedef struct Shapes {
	short mny;	/* no. items in set */
	short alloc;	/* no. items that can fit in array (alloc>=mny) */
	Nb_s *sa;	/* ptr to contiguous array (malloc space) */
	} Shapes;

#define Init_Shapes {0,0,NULL}
#if MAIN
Shapes empty_Shapes = Init_Shapes;
#else
extern Shapes empty_Shapes;
#endif

#define SH_INCR (20)	/* Shapes are allocated by this increment */

#define init_sh(sh) { \
	(sh)->alloc = SH_INCR; \
	if(((sh)->sa=(Nb_s *)malloc((sh)->alloc*sizeof(Nb_s)))==NULL) \
		abort("can't alloc sh->sa[%d]",(sh)->alloc); \
	(sh)->mny = 0; \
	}

#define add_sh(s,sh) { \
	if((sh)->mny==(sh)->alloc) { \
		(sh)->alloc += SH_INCR; \
		if(((sh)->sa=(Nb_s *)realloc((sh)->sa,(sh)->alloc*sizeof(Nb_s)))==NULL) \
			abort("can't alloc (sh)->sa[%d]",(sh)->alloc); \
		}; \
	(sh)->sa[(sh)->mny++] = *(s); \
	}

/* Parameters governing the pseudo-random generation of a Char image
   using a 1st-order statistical model of imaging. */ 
typedef struct RanParms {
	short res_x;	/* -r resolution (scanner pels/inch) */
	short res_y;	/*    (for now, equal to res_x) */
	Pts size;	/* -p size of text */
	Radians skew;	/* -a skew angle */
	Ems bhgt;	/* -b height above baseline */
	float blur;	/* -e blurring std err (scanner pels) */
	float jitter;	/* -j jitter std err (scanner pels) */
	float kern;	/* -k kerning std err (scanner pels) */
	float speckle;	/* -s pel-wise additive noise std err (scanner pels) */
	float thresh;	/* -t threshold for binarization */
	float xscale;	/* -x horizontal scaling (dimensionless) */
	float yscale;	/* -y vertical scaling (dimensionless) */
	} RanParms;
#define Init_RanParms {0,0,0.0,0.0,0.0,0.7,0.0,0.0,0.125,0.25,1.0,1.0}
#if MAIN
RanParms empty_RanParms = Init_RanParms;
#else
extern RanParms empty_RanParms;
#endif

RanParms *alloc_ranparms();
RanParms *dup_ranparms();
char *ranparms_toa();
RanParms *ato_ranparms();
fwrb_ranparms();
int frdb_ranparms();

/* Char - a character:  isolated, elementary symbol of the writing system;
   linguists might call this a `graph' */
typedef struct Char {
	Ident ident;		/* feature bits */
	Bbx bx;
	Scoor csp;		/* space before character in Txtln (abs. coords) */
	long area;		/* no. pixels */
	long per;		/* perimeter (all bdys) */
	Scoor basl;		/* baseline (absolute coordinates, local copy) */
	/* next should be Blobl */
	int bmny;		/* no. blobs in Char */
	struct Blob *fi;	/* 1st in list (p.n ptrs) (bmny==0 -> fi==NULL) */
	Pval *sfv;		/* scalar-features (SFv) */
	Shapes sh;		/* set of shapes (size- & loc'n-invariant) */
	Bfeats *bfsp;		/* binary features */
	RanParms *rp;		/* randomizing parameters */
	Interpl il;		/* interpretations */
	char *l;		/* label (ASCII string in malloc space) */
	} Char;

#define Init_Char {IsChar,Init_Bbx,0,0L,0L,Scoor_MIN,0,NULL,NULL,Init_Shapes,NULL,NULL,Init_Interpl,NULL}
#if MAIN
Char empty_Char = Init_Char;
#else
extern Char empty_Char;
#endif

/* Char.ident flags: */
#define Char_spelled	00000000001	/* Its 1st Interp is in correct spelling */
#define Char_confused	00000000002	/* The classifier may have confused this */
#define Char_termhyp	00000000004	/* a terminal hyphen of its Word */
#define Char_omit	00000000010	/* can be omitted */
#define Char_label	00000000020	/* has an ASCII string label */
#define Char_ranparms	00000000040	/* has RanParms */
#define Char_split	00000000100	/* resulted from splitting a Char */
#define Char_merged	00000000200	/* resulted from merging Chars */

typedef struct Chars {
	int mny;	/* mny==0 -> cpa==NULL */
	Char **cpa;	/* pts to NULL-term'd array of ptrs (in malloc space) */
	} Chars;

#define Init_Chars {0,NULL}
#if MAIN
Chars empty_Chars = Init_Chars;
#else
extern Chars empty_Chars;
#endif

/* CharF - Char external file format */
typedef struct CharF {
	Ident ident;		/* feature bits: IsChar must be set */
	Bbx bx;
	Scoor csp;		/* space before character in Txtln (abs. coords) */
	long area;
	long per;
	short bmny;		/* no. blobs to follow */
	short imny;		/* no. interpretations (follows immediately) */
	short sfmny;		/* no. scalar features (follows immediately) */
	short shmny;		/* no. shape features (follows immediately) */
	short bfmny;		/* no. binary features (follows immediately) */
	/* if ident&Char_ranparms, RanParms follows CharF */
	/* if ident&Char_label, label follows CharF ('\0'-terminated string) */
	} CharF;

Char *alloc_char();		/* in Text.c */
Char *append_char();
Char *insert_char();
Char *insert_char_word();
Char *dup_char();
Char *dup_char_etc();
Chars *append_chars();
Chars *dup_chars_etc();

typedef struct Words {
	int mny;		/* mny==0 -> wpa==NULL */
	struct Word **wpa;	/* pts to NULL-term'd array of ptrs */
	} Words;

#define Init_Words {0,NULL}
#if MAIN
Words empty_Words = Init_Words;
#else
extern Words empty_Words;
#endif

/* Word - one or more Chars lying in a textline close together.
   wsp (word space) is always >=0 and is scaled by xheight (of Txtln) */
typedef struct Word {
	Ident ident;		/* feature bits */
	Bbx bx;
	float wsp;		/* space before word (multiple of wst*em) */
	Merit m;		/* Word merit (function of its Char's merits) */
	Prob p;			/* probability */
	Words ws;		/* set of alternative segmentations */
	Chars cs;
	Blobs bs;
	char *l;		/* label (ASCII string in malloc space) */
	int hash;		/* hash key for fast equality checking */
	} Word;

#define Init_Word {IsWord,Init_Bbx,0.0,0.0,0.0,Init_Words,Init_Chars,Init_Blobs,NULL,0}
#if MAIN
Word empty_Word = Init_Word;
#else
extern Word empty_Word;
#endif

/* Word-interpretation (as printable ASCII string).
   All string fields must point to malloc-space strings.
   */
typedef struct WordInterp {
	Ident ident;	/* identifies word type */
	char *s;	/* entire string = pp+by+po+ps */
	char *pp;	/* punctuation prefix */
	char *by;	/* body of word */
	char *po;	/* possessive ('s 'S) or negative (n't N'T) suffix */
	char *ps;	/* punctuation suffix */
	} WordInterp;

#define Init_WordInterp {IsWordInterp,NULL,NULL,NULL,NULL,NULL}
#if MAIN
WordInterp empty_WordInterp = Init_WordInterp;
#else
extern WordInterp empty_WordInterp;
#endif

WordInterp *dup_wordinterp_etc();

/* Word.ident & WordInterp.ident flags: */
#define Word_spelled	00000000001	/* by spells correctly */
#define Word_wf		00000000002	/* s is well-formed */
#define Word_ok		00000000004	/* s is ok ("acceptable") */
#define Word_numeric	00000000010	/* by is numeric */
#define Word_initcap	00000000020	/* by has initial capital */
#define Word_allcaps	00000000040	/* by is all caps */
#define Word_hyphens	00000000100	/* by has imbedded hyphens */
#define Word_slashes	00000000200	/* by has imbedded slashes */
#define Word_endsent	00000000400	/* end of sentence: ps has .!? */
#define Word_termhyp	00000001000	/* some interpretation ends with hyphen */
#define Word_label	00000002000	/* has an ASCII string label */
#define Word_allalp	00000004000	/* s is all alphabetic */
#define Word_bodalp	00000010000	/* by is all alphabetic */

/* WordF - Word external file format */
typedef struct WordF {
	Ident ident;		/* feature bits: IsWord must be set */
	Bbx bx;
	float wsp;		/* space before word (multiple of thr) */
	float m;		/* merit */
	short wmny;		/* no. Words (in Word.ws) to follow this Word */
	short cmny;		/* no. Chars to follow this Word */
	short bmny;		/* no. Blobs to follow this Word */
	/* if ident&Word_label, label follows WordF ('\0'-terminated string) */
	} WordF;

/* constant pitch model for a Txtln */
typedef struct ConstPitch {
	Ems w;		/* Character pitch */
	Scoor o;	/* origin (one of the character break points) */
	float r;	/* max/min autocorrelation ratio - the larger the better */
	} ConstPitch;

#define Init_ConstPitch {0.0,0,0.0}
#if MAIN
ConstPitch empty_ConstPitch = Init_ConstPitch;
#else
extern ConstPitch empty_ConstPitch;
#endif

typedef struct Txtlns {
	short mny;
	struct Txtln **lpa;	/* to array of Txtln's (if mny==0, lpa==NULL) */
	} Txtlns;

#define Init_Txtlns {0,NULL}
#if MAIN
Txtlns empty_Txtlns = Init_Txtlns;
#else
extern Txtlns empty_Txtlns;
#endif

/* Text Line */
#define Txtln_label	00000000004	/* has an ASCII string label */
#define Txtln_size	00000000002	/* dominant text size chosen */
#define Txtln_basl	00000000001	/* dominant baseline chosen */

typedef struct Txtln {
	Ident ident;
	Bbx bx;
	Pts size;	/* dominant text size (0 means unknown) */
	Scoor basl;	/* dominant baseline (absolute y coordinate) */
	short *proj;	/* ptr to malloc space projection array */
	ConstPitch *cp;	/* ptr to malloc space constant-pitch model */
	Merit m;	/* merit */
	Txtlns ls;	/* alternative Txtln segmentations */
	Words ws;	/* sorted asc. on x */
	Chars cs;	/* sorted asc. on bx.a.x */
	Blobs bs;	/* misc. non-char blobs */
	char *l;	/* label (ASCII string in malloc space) */
	} Txtln;

#define Init_Txtln {IsTxtln,Init_Bbx,0.0,0,NULL,NULL,0.0,Init_Txtlns,Init_Words,Init_Chars,Init_Blobs,NULL}
#if MAIN
Txtln empty_Txtln = Init_Txtln;
#else
extern Txtln empty_Txtln;
#endif

/* Text Line */
typedef struct TxtlnF {
	Ident ident;	/* IsTxtln must be set */
	Bbx bx;
	Scoor basl;	/* baseline (absolute y coordinate) */
	Pts size;	/* text size (<=0.0 means none is known) */
	short pmny;	/* no. shorts in projection array to follow */
	float m;	/* merit */
	short lmny;	/* no. alternative txtlns to follow */
	short wmny;	/* no. words to follow */
	short cmny;	/* no. chars to follow */
	int bmny;	/* no. blobs to follow */
	/* if ident&Txtln_label, label follows TxtlnF ('\0'-terminated string) */
	} TxtlnF;

/* blocks of text */
typedef struct Blocks {
	short mny;		/* if mny==0, bpa==NULL */
	struct Block **bpa;	/* to NULL-term'd array of ptrs */
	} Blocks;

#define Init_Blocks {0,NULL}
#if MAIN
Blocks empty_Blocks = Init_Blocks;
#else
extern Blocks empty_Blocks;
#endif

/* block of text */
typedef struct Block {
	Ident ident;
	Bbx bx;		/* bounding box of block */
	Radians skew;	/* skew angle (as correction to Page.skew) */
	Radians shear;	/* shear angle (as correction to Page.shear) */
	Ems wst;	/* word-space threshhold */
	Merit m;	/* merit */
	Blocks bks;	/* Blocks nested within this one */
	Txtlns ls;
	Words ws;
	Chars cs;
	Blobs bs;
	char *l;	/* label (ASCII string in malloc space) */
	} Block;

#define Init_Block {IsBlock,Init_Bbx,0.0,0.0,0.0,0.0,Init_Blocks,Init_Txtlns,Init_Words,Init_Chars,Init_Blobs,NULL}
#if MAIN
Block empty_Block = Init_Block;
#else
extern Block empty_Block;
#endif

/* Ident bits for Blocks */
#define Block_wst	00000000001	/* word-space-thresh set up */
#define Block_label	00000000002	/* has an ASCII string label */

#define Block_mb (1)

/* block of text */
typedef struct BlockF {
	Ident ident;	/* IsBlock must be set */
	Bbx bx;
	Ems wst;	/* word-space threshold */
	float skew;
	float shear;
#if Block_mb
	float m;
	short bkmny;
#endif
	short lmny;
	short wmny;
	int cmny;
	int bmny;
	/* if ident&Block_label, label follows BlockF ('\0'-terminated string) */
	} BlockF;

/* page */
typedef struct Page {
	Ident ident;
	Bbx bx;			/* extreme indices in pixels */
	short res_x;		/* resolution in pixels/inch:  x & y */
	short res_y;
	Radians skew;		/* skew angle */
	Radians shear;		/* shear correction */
	Blocks bks;		/* blocks */
	Txtlns ls;		/* textlines (those not in any block) */
	Words ws;		/* words (not in any textline) */
	Chars cs;		/* chars (not in any word) */
	Blobs bs;		/* blobs (not in any char) */
	char *l;		/* label (ASCII string in malloc space) */
	} Page;

#define Init_Page {IsPage,Init_Bbx,0,0,0.0,0.0,Init_Blocks,Init_Txtlns,Init_Words,Init_Chars,Init_Blobs,NULL}
#if MAIN
Page empty_Page = Init_Page;
#else
extern Page empty_Page;
#endif

#define Page_label	00000000001	/* has a label */

/* Pages of text */
typedef struct Pages {
	unsigned short mny;	/* if mny==0, pa==NULL */
	struct Page **pa;	/* to NULL-term'd array of ptrs */
	} Pages;

#define Init_Pages {0,NULL}
#if MAIN
Pages empty_Pages = Init_Pages;
#else
extern Pages empty_Pages;
#endif

typedef struct PageF {
	Ident ident;		/* IsPage bit must be set */
	short res_x,res_y;	/* resolution in pixels/inch:  x & y */
	Bbx bx;			/* extreme indices in pixels */
	float skew;		/* original page skew angle, radians */
	float shear;		/* original page shear angle, radians */
	short bkmny;		/* no. blocks */
	short lmny;		/* no. textlines (not in any block) */
	short wmny;		/* no. words (not in any textline) */
	int cmny;		/* no. chars (not in any word) */
	int bmny;		/* no. blobs (not in any char)) */
	/* if ident&Page_label, label follows PageF ('\0'-terminated string) */
	} PageF;

#define Page_new	00000000001	/* Page is in ``new'' format */

/* Each Dim-file begins with a Doc record */
typedef struct Doc {
	unsigned short version;		/* file format version number */
	Pages ps;
	char *l;		/* ASCII label */
	} Doc;

#define Init_Doc {0,Init_Pages,NULL}
#if MAIN
Doc empty_Doc = Init_Doc;
#else
extern Doc empty_Doc;
#endif

char *ident_toa();
Ident cto_ident();
Ident cto_flag();
char *merit_toa();
Page *alloc_page();
char *page_toa();
Page *dup_page();
Page *dup_page_etc();
Block *alloc_block();
char *block_toa();
Block *dup_block();
Block *dup_block_etc();
Block *append_block();
Blocks *dup_blocks_etc();
ConstPitch *alloc_constpitch();
char *constpitch_toa();
Txtln *alloc_txtln();
char *txtln_toa();
Word *alloc_word();
char *word_toa();
boolean eq_word();
int hash_word();
Char *alloc_char();
char *char_toa();
Pp *char_centroid();
Char *char_of_blob();
char *interp_toa();
char *blob_toa();
Pp *blob_centroid();
char *runf_toa();
char *runfs_toa();
char *pp_toa();
char *bdyedge_toa();
Blob *dup_blob();
Blob *dup_blob_etc();
Blob *dup_blobl_etc();
Blob *runs_to_runs();
Blobs *dup_blobs_etc();
Blobs *blobl_to_blobs();
Interp *alloc_interp();
Interp *dup_interp();
Interpl *dup_interpl_etc();
Interps *dup_interps_etc();
Word *append_word();
Word *insert_word();
Word *insert_word_txtln();
Word *dup_word();
Word *dup_word_etc();
Words *dup_words_etc();
Txtln *dup_txtln();
Txtln *dup_txtln_etc();
Txtlns *dup_txtlns_etc();
Block *dup_block();
Block *dup_block_etc();
Radians add_ang();
Radians subtract_ang();

/* in-line macroes */

/* Merge the `source' Bbx into the `destination' Bbx, expanding the dest Bbx
   as required.  The source Bbx is unmodified.  Usage:
	merge_bbx(s,d)
	    Bbx *s,*d;
*/
#define merge_bbx(s,d) { \
	if((s)->a.x < (d)->a.x) (d)->a.x = (s)->a.x; \
	if((s)->a.y < (d)->a.y) (d)->a.y = (s)->a.y; \
	if((s)->b.x > (d)->b.x) (d)->b.x = (s)->b.x; \
	if((s)->b.y > (d)->b.y) (d)->b.y = (s)->b.y; \
	}

/* Experimental implementation of a data structure for maintaining a set
   of distinct Words whose merit falls within a dynamically-varying range.
   This implementation is worst-case quadratic time.
   BUGS:  insert_wordset shouldn't duplicate the word.
   */

#define dbg_ws (0)	/* if !=0, enable WordSet debugging tracing */

typedef struct WordSet {
	double cut;	/* cut-fraction */
	int cap;	/* capacity: maximum number permitted at any time */
	double top;	/* maximum merit seen since allocation */
	Words ws;	/* sorted by top-choice merit */
	Word *max,*min;	/* maximum/minimum entries currently in ws */
	int high;	/* high-water:  max. no. entries in history */
	} WordSet;
#define Init_WordSet {1.0,INT_MAX,0.0,Init_Words,NULL,NULL,0}
#if MAIN
WordSet empty_WordSet = Init_WordSet;
#else
extern WordSet empty_WordSet;
#endif

#define size_wordset(s) ((s)->ws.mny)
#define top_wordset(s) ((s)->top)
#define max_wordset(s) ((s)->max)
#define min_wordset(s) ((s)->min)
#define max_wordmerit(s) ((max_wordset((s))!=NULL)? (max_wordset((s)))->m: 0.0)
#define min_wordmerit(s) ((min_wordset((s))!=NULL)? (min_wordset((s)))->m: 0.0)

Merit wordmerit();
WordSet *alloc_wordset();
boolean insert_wordset();
Word *remove_wordset();
int free_wordset_etc();
err_wordset();