home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Usenet 1994 October
/
usenetsourcesnewsgroupsinfomagicoctober1994disk2.iso
/
misc
/
volume39
/
par131
/
part03
/
Par131
/
reformat.c
< prev
next >
Wrap
C/C++ Source or Header
|
1993-09-11
|
15KB
|
526 lines
/*********************/
/* reformat.c */
/* for Par 1.31 */
/* Copyright 1993 by */
/* Adam M. Costello */
/*********************/
/* This is ANSI C code. */
#include "reformat.h" /* Makes sure we're consistent with the */
/* prototype. Also includes "errmsg.h". */
#include "buffer.h" /* Also includes <stddef.h>. */
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#undef NULL
#define NULL ((void *) 0)
#ifdef DONTFREE
#define free(ptr)
#endif
typedef struct word {
const char *chrs; /* Pointer to the characters in the word */
/* (NOT terminated by '\0'). */
struct word *prev, /* Pointer to previous word. */
*next, /* Pointer to next word. */
/* Supposing this word were the first... */
*nextline; /* Pointer to first word in next line. */
int score, /* Value of the objective function. */
length; /* Length of this word. */
short flags; /* Notable properties of this word. */
} word;
/* The following may be bitwise-OR'd together */
/* to set the flags field of a word: */
const short W_SHIFTED = 1, /* This word should have an extra space before */
/* it unless it's the first word in the line. */
W_CURIOUS = 2, /* This is a curious word (see par.doc). */
W_CAPITAL = 4; /* This is a capitalized word (see par.doc). */
#define isshifted(w) ((w)->flags & 1)
#define iscurious(w) (((w)->flags & 2) >> 1)
#define iscapital(w) (((w)->flags & 4) >> 2)
const char * const impossibility =
"Impossibility #%d has occurred. Please report it.\n";
static int checkcapital(word *w)
/* Returns 1 if *w is capitalized according to the definition */
/* in par.doc (assuming <cap> is 0), or 0 if not. */
{
const char *p, *end;
for (p = w->chrs, end = p + w->length; p < end && !isalnum(*p); ++p);
return p < end && !islower(*p);
}
static int checkcurious(word *w)
/* Returns 1 if *w is curious according to */
/* the definition in par.doc, or 0 if not. */
{
const char *start, *p;
char ch;
for (start = w->chrs, p = start + w->length; p > start; --p) {
ch = p[-1];
if (isalnum(ch)) return 0;
if (ch == '.' || ch == '?' || ch == '!' || ch == ':') break;
}
if (p <= start + 1) return 0;
--p;
do if (isalnum(*--p)) return 1;
while (p > start);
return 0;
}
static int simplebreaks(word *head, word *tail, int L, int last)
/* Chooses line breaks in a list of words which maximize the length of the */
/* shortest line. L is the maximum line length. The last line counts as a */
/* line only if last is non-zero. _head must point to a dummy word, and tail */
/* must point to the last word, whose next field must be NULL. Returns the */
/* length of the shortest line on success, -1 if there is a word of length */
/* greater than L, or L if there are no lines. */
{
word *w1, *w2;
int linelen, score;
if (!head->next) return L;
for (w1 = tail, linelen = w1->length;
w1 != head && linelen <= L;
linelen += isshifted(w1), w1 = w1->prev, linelen += 1 + w1->length) {
w1->score = last ? linelen : L;
w1->nextline = NULL;
}
for ( ; w1 != head; w1 = w1->prev) {
w1->score = -1;
for (linelen = w1->length, w2 = w1->next;
linelen <= L;
linelen += 1 + isshifted(w2) + w2->length, w2 = w2->next) {
score = w2->score;
if (linelen < score) score = linelen;
if (score >= w1->score) {
w1->nextline = w2;
w1->score = score;
}
}
}
return head->next->score;
}
static void normalbreaks(word *head, word *tail, int L,
int fit, int last, errmsg_t errmsg)
/* Chooses line breaks in a list of words according to the policy */
/* in "par.doc" for <just> = 0 (L is <L>, fit is <fit>, and last is */
/* <last>). head must point to a dummy word, and tail must point */
/* to the last word, whose next field must be NULL. */
{
word *w1, *w2;
int tryL, shortest, score, target, linelen, extra, minlen;
*errmsg = '\0';
if (!head->next) return;
target = L;
/* Determine minimum possible difference between */
/* the lengths of the shortest and longest lines: */
if (fit) {
score = L + 1;
for (tryL = L; ; --tryL) {
shortest = simplebreaks(head,tail,tryL,last);
if (shortest < 0) break;
if (tryL - shortest < score) {
target = tryL;
score = target - shortest;
}
}
}
/* Determine maximum possible length of the shortest line: */
shortest = simplebreaks(head,tail,target,last);
if (shortest < 0) {
sprintf(errmsg,impossibility,1);
return;
}
/* Minimize the sum of the squares of the differences */
/* between target and the lengths of the lines: */
w1 = tail;
do {
w1->score = -1;
for (linelen = w1->length, w2 = w1->next;
linelen <= target;
linelen += 1 + isshifted(w2) + w2->length, w2 = w2->next) {
extra = target - linelen;
minlen = shortest;
if (w2)
score = w2->score;
else {
score = 0;
if (!last) extra = minlen = 0;
}
if (linelen >= minlen && score >= 0) {
score += extra * extra;
if (w1->score < 0 || score <= w1->score) {
w1->nextline = w2;
w1->score = score;
}
}
if (!w2) break;
}
w1 = w1->prev;
} while (w1 != head);
if (head->next->score < 0)
sprintf(errmsg,impossibility,2);
}
static void justbreaks(word *head, word *tail,
int L, int last, errmsg_t errmsg)
/* Chooses line breaks in a list of words according to the */
/* policy in "par.doc" for <just> = 1 (L is <L> and last is */
/* <last>). head must point to a dummy word, and tail must */
/* point to the last word, whose next field must be NULL. */
{
word *w1, *w2;
int numgaps, extra, score, gap, maxgap, numbiggaps;
*errmsg = '\0';
if (!head->next) return;
/* Determine the minimum possible largest inter-word gap: */
w1 = tail;
do {
w1->score = L;
for (numgaps = 0, extra = L - w1->length, w2 = w1->next;
extra >= 0;
++numgaps, extra -= 1 + isshifted(w2) + w2->length, w2 = w2->next) {
gap = numgaps ? (extra + numgaps - 1) / numgaps : L;
if (w2)
score = w2->score;
else {
score = 0;
if (!last) gap = 0;
}
if (gap > score) score = gap;
if (score < w1->score) {
w1->nextline = w2;
w1->score = score;
}
if (!w2) break;
}
w1 = w1->prev;
} while (w1 != head);
maxgap = head->next->score;
if (maxgap >= L) {
strcpy(errmsg, "Cannot justify.\n");
return;
}
/* Minimize the sum of the squares of the numbers */
/* of extra spaces required in each inter-word gap: */
w1 = tail;
do {
w1->score = -1;
for (numgaps = 0, extra = L - w1->length, w2 = w1->next;
extra >= 0;
++numgaps, extra -= 1 + isshifted(w2) + w2->length, w2 = w2->next) {
gap = numgaps ? (extra + numgaps - 1) / numgaps : L;
if (w2)
score = w2->score;
else {
if (!last) {
w1->nextline = NULL;
w1->score = 0;
break;
}
score = 0;
}
if (gap <= maxgap && score >= 0) {
numbiggaps = extra % numgaps;
score += (extra / numgaps) * (extra + numbiggaps) + numbiggaps;
/* The above may not look like the sum of the squares of the numbers */
/* of extra spaces required in each inter-word gap, but trust me, it */
/* is. It's easier to prove graphically than algebraicly. */
if (w1->score < 0 || score <= w1->score) {
w1->nextline = w2;
w1->score = score;
}
}
if (!w2) break;
}
w1 = w1->prev;
} while (w1 != head);
if (head->next->score < 0)
sprintf(errmsg,impossibility,3);
}
char **reformat(
const char * const *inlines, const char * const *endline,
int hang, int prefix, int suffix, int width, int cap, int fit,
int guess, int just, int last, int Report, int touch, errmsg_t errmsg
)
{
int numin, affix, L, onfirstword = 1, linelen, numout, numgaps, extra, phase;
const char * const *line, **suffixes = NULL, **suf, *end, *p1, *p2;
char *q1, *q2, **outlines = NULL;
word dummy, *head, *tail, *w1, *w2;
buffer *pbuf = NULL;
/* Initialization: */
*errmsg = '\0';
dummy.next = dummy.prev = NULL;
dummy.flags = 0;
head = tail = &dummy;
numin = endline - inlines;
/* Allocate space for pointers to the suffixes: */
if (numin) {
suffixes = malloc(numin * sizeof (const char *));
if (!suffixes) {
strcpy(errmsg,outofmem);
goto rfcleanup;
}
}
/* Set the pointers to the suffixes, and create the words: */
affix = prefix + suffix;
L = width - prefix - suffix;
for (line = inlines, suf = suffixes; line < endline; ++line, ++suf) {
for (end = *line; *end; ++end);
if (end - *line < affix) {
sprintf(errmsg,
"Line %d shorter than <prefix> + <suffix> = %d + %d = %d\n",
line - inlines + 1, prefix, suffix, affix);
goto rfcleanup;
}
end -= suffix;
*suf = end;
p1 = *line + prefix;
for (;;) {
while (p1 < end && *p1 == ' ') ++p1;
if (p1 == end) break;
p2 = p1;
if (onfirstword) {
p1 = *line + prefix;
onfirstword = 0;
}
while (p2 < end && *p2 != ' ') ++p2;
w1 = malloc(sizeof (word));
if (!w1) {
strcpy(errmsg,outofmem);
goto rfcleanup;
}
w1->next = NULL;
w1->prev = tail;
tail = tail->next = w1;
w1->chrs = p1;
w1->length = p2 - p1;
w1->flags = 0;
p1 = p2;
}
}
/* If guess is 1, set flag values and merge words: */
if (guess) {
for (w1 = head, w2 = head->next; w2; w1 = w2, w2 = w2->next) {
if (checkcurious(w2)) w2->flags |= W_CURIOUS;
if (cap || checkcapital(w2)) {
w2->flags |= W_CAPITAL;
if (iscurious(w1))
if (w1->chrs[w1->length] && w1->chrs + w1->length + 1 == w2->chrs) {
w2->length += w1->length + 1;
w2->chrs = w1->chrs;
w2->prev = w1->prev;
w2->prev->next = w2;
if (iscapital(w1)) w2->flags |= W_CAPITAL;
else w2->flags &= ~W_CAPITAL;
if (isshifted(w1)) w2->flags |= W_SHIFTED;
else w2->flags &= ~W_SHIFTED;
free(w1);
}
else
w2->flags |= W_SHIFTED;
}
}
tail = w1;
}
/* Check for too-long words: */
if (Report)
for (w2 = head->next; w2; w2 = w2->next) {
if (w2->length > L) {
linelen = w2->length;
if (linelen > errmsg_size - 17)
linelen = errmsg_size - 17;
sprintf(errmsg, "Word too long: %.*s\n", linelen, w2->chrs);
goto rfcleanup;
}
}
else
for (w2 = head->next; w2; w2 = w2->next)
while (w2->length > L) {
w1 = malloc(sizeof (word));
if (!w1) {
strcpy(errmsg,outofmem);
goto rfcleanup;
}
w1->next = w2;
w1->prev = w2->prev;
w1->prev->next = w1;
w2->prev = w1;
w1->chrs = w2->chrs;
w2->chrs += L;
w1->length = L;
w2->length -= L;
w1->flags = 0;
if (iscapital(w2)) {
w1->flags |= W_CAPITAL;
w2->flags &= ~W_CAPITAL;
}
if (isshifted(w2)) {
w1->flags |= W_SHIFTED;
w2->flags &= ~W_SHIFTED;
}
}
/* Choose line breaks according to policy in "par.doc": */
if (just) justbreaks(head,tail,L,last,errmsg);
else normalbreaks(head,tail,L,fit,last,errmsg);
if (*errmsg) goto rfcleanup;
/* Change L to the length of the longest line if required: */
if (!just && touch) {
L = 0;
w1 = head->next;
while (w1) {
for (linelen = w1->length, w2 = w1->next;
w2 != w1->nextline;
linelen += 1 + isshifted(w2) + w2->length, w2 = w2->next);
if (linelen > L) L = linelen;
w1 = w2;
}
}
/* Construct the lines: */
pbuf = newbuffer(sizeof (char *), errmsg);
if (*errmsg) goto rfcleanup;
numout = 0;
w1 = head->next;
while (numout < hang || w1) {
if (w1)
for (w2 = w1->next, numgaps = 0, extra = L - w1->length;
w2 != w1->nextline;
++numgaps, extra -= 1 + isshifted(w2) + w2->length, w2 = w2->next);
linelen = suffix || just && (w2 || last) ?
L + affix :
w1 ? prefix + L - extra : prefix;
q1 = malloc((linelen + 1) * sizeof (char));
if (!q1) {
strcpy(errmsg,outofmem);
goto rfcleanup;
}
additem(pbuf, &q1, errmsg);
if (*errmsg) goto rfcleanup;
++numout;
q2 = q1 + prefix;
if (numout <= numin) memcpy(q1, inlines[numout - 1], prefix);
else if (numin > hang) memcpy(q1, inlines[numin - 1], prefix);
else while (q1 < q2) *q1++ = ' ';
q1 = q2;
if (w1) {
phase = numgaps / 2;
for (w2 = w1; ; ) {
memcpy(q1, w2->chrs, w2->length);
q1 += w2->length;
w2 = w2->next;
if (w2 == w1->nextline) break;
*q1++ = ' ';
if (just && (w1->nextline || last)) {
phase += extra;
while (phase >= numgaps) {
*q1++ = ' ';
phase -= numgaps;
}
}
if (isshifted(w2)) *q1++ = ' ';
}
}
q2 += linelen - affix;
while (q1 < q2) *q1++ = ' ';
q2 = q1 + suffix;
if (numout <= numin) memcpy(q1, suffixes[numout - 1], suffix);
else if (numin) memcpy(q1, suffixes[numin - 1], suffix);
else while(q1 < q2) *q1++ = ' ';
*q2 = '\0';
if (w1) w1 = w1->nextline;
}
q1 = NULL;
additem(pbuf, &q1, errmsg);
if (*errmsg) goto rfcleanup;
outlines = copyitems(pbuf,errmsg);
rfcleanup:
if (suffixes) free(suffixes);
while (tail != head) {
tail = tail->prev;
free(tail->next);
}
if (pbuf) {
if (!outlines)
for (;;) {
outlines = nextitem(pbuf);
if (!outlines) break;
free(*outlines);
}
freebuffer(pbuf);
}
return outlines;
}