You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
696 lines
24 KiB
696 lines
24 KiB
15 years ago
|
/* ------------------------------------------------------------------------
|
||
|
@NAME : string_util.c
|
||
|
@DESCRIPTION: Various string-processing utility functions:
|
||
|
bt_purify_string()
|
||
|
bt_change_case()
|
||
|
|
||
|
and their helpers:
|
||
|
foreign_letter()
|
||
|
purify_special_char()
|
||
|
@GLOBALS :
|
||
|
@CALLS :
|
||
|
@CALLERS :
|
||
|
@CREATED : 1997/10/19, Greg Ward
|
||
|
@MODIFIED : 1997/11/25, GPW: renamed to from purify.c to string_util.c
|
||
|
added bt_change_case() and friends
|
||
|
@VERSION : $Id: string_util.c,v 1.10 1999/10/28 22:50:28 greg Rel $
|
||
|
-------------------------------------------------------------------------- */
|
||
|
|
||
|
#include <stdlib.h>
|
||
|
#include <ctype.h>
|
||
|
#include <string.h>
|
||
|
#include <assert.h>
|
||
|
#include "error.h"
|
||
|
#include "btparse.h"
|
||
|
#include "bt_debug.h"
|
||
|
|
||
|
|
||
|
/*
|
||
|
* These definitions should be fixed to be consistent with HTML
|
||
|
* entities, just for fun. And perhaps I should add entries for
|
||
|
* accented letters (at least those supported by TeX and HTML).
|
||
|
*/
|
||
|
typedef enum
|
||
|
{
|
||
|
L_OTHER, /* not a "foreign" letter */
|
||
|
L_OSLASH_L, /* Eastern European {\o} */
|
||
|
L_OSLASH_U,
|
||
|
L_LSLASH_L, /* {\l} */
|
||
|
L_LSLASH_U,
|
||
|
L_OELIG_L, /* Latin {\oe} ligature */
|
||
|
L_OELIG_U,
|
||
|
L_AELIG_L, /* {\ae} ligature */
|
||
|
L_AELIG_U,
|
||
|
L_SSHARP_L, /* German "sharp s" {\ss} */
|
||
|
L_SSHARP_U,
|
||
|
L_ACIRCLE_L, /* Nordic {\aa} */
|
||
|
L_ACIRCLE_U,
|
||
|
L_INODOT_L, /* undotted i: {\i} */
|
||
|
L_JNODOT_L /* {\j} */
|
||
|
} bt_letter;
|
||
|
|
||
|
|
||
|
static const char * uc_version[] =
|
||
|
{
|
||
|
NULL, /* L_OTHER */
|
||
|
"\\O", /* L_OSLASH_L */
|
||
|
"\\O", /* L_OSLASH_U */
|
||
|
"\\L", /* L_LSLASH_L */
|
||
|
"\\L", /* L_LSLASH_U */
|
||
|
"\\OE", /* L_OELIG_L */
|
||
|
"\\OE", /* L_OELIG_U */
|
||
|
"\\AE", /* L_AELIG_L */
|
||
|
"\\AE", /* L_AELIG_U */
|
||
|
"SS", /* L_SSHARP_L -- for LaTeX 2.09 */
|
||
|
"\\SS", /* L_SSHARP_U */
|
||
|
"\\AA", /* L_ACIRCLE_L */
|
||
|
"\\AA", /* L_ACIRCLE_U */
|
||
|
"I", /* L_INODOT_L */
|
||
|
"J" /* L_JNODOT_L */
|
||
|
};
|
||
|
|
||
|
static const char * lc_version[] =
|
||
|
{
|
||
|
NULL, /* L_OTHER */
|
||
|
"\\o", /* L_OSLASH_L */
|
||
|
"\\o", /* L_OSLASH_U */
|
||
|
"\\l", /* L_LSLASH_L */
|
||
|
"\\l", /* L_LSLASH_U */
|
||
|
"\\oe", /* L_OELIG_L */
|
||
|
"\\oe", /* L_OELIG_U */
|
||
|
"\\ae", /* L_AELIG_L */
|
||
|
"\\ae", /* L_AELIG_U */
|
||
|
"\\ss", /* L_SSHARP_L */
|
||
|
"\\ss", /* L_SSHARP_U */
|
||
|
"\\aa", /* L_ACIRCLE_L */
|
||
|
"\\aa", /* L_ACIRCLE_U */
|
||
|
"\\i", /* L_INODOT_L */
|
||
|
"\\j" /* L_JNODOT_L */
|
||
|
};
|
||
|
|
||
|
|
||
|
|
||
|
/* ------------------------------------------------------------------------
|
||
|
@NAME : foreign_letter()
|
||
|
@INPUT : str
|
||
|
start
|
||
|
stop
|
||
|
@OUTPUT : letter
|
||
|
@RETURNS : TRUE if the string delimited by start and stop is a foreign
|
||
|
letter control sequence
|
||
|
@DESCRIPTION: Determines if a character sequence is one of (La)TeX's
|
||
|
"foreign letter" control sequences (l, o, ae, oe, aa, ss, plus
|
||
|
uppercase versions). If `letter' is non-NULL, returns which
|
||
|
letter was found in it (as a bt_letter value).
|
||
|
@CALLS :
|
||
|
@CALLERS : purify_special_char()
|
||
|
@CREATED : 1997/10/19, GPW
|
||
|
@MODIFIED :
|
||
|
-------------------------------------------------------------------------- */
|
||
|
static boolean
|
||
|
foreign_letter (char *str, int start, int stop, bt_letter * letter)
|
||
|
{
|
||
|
char c1, c2;
|
||
|
bt_letter dummy;
|
||
|
|
||
|
|
||
|
/*
|
||
|
* This is written for speed, not flexibility -- adding new foreign
|
||
|
* letters would be trying and vexatious.
|
||
|
*
|
||
|
* N.B. my gold standard list of foreign letters is Kopka and Daly's
|
||
|
* *A Guide to LaTeX 2e*, section 2.5.6.
|
||
|
*/
|
||
|
|
||
|
if (letter == NULL) /* so we can assign to *letter */
|
||
|
letter = &dummy; /* without compunctions */
|
||
|
*letter = L_OTHER; /* assume not a "foreign" letter */
|
||
|
|
||
|
c1 = str[start+0]; /* only two characters that we're */
|
||
|
c2 = str[start+1]; /* interested in */
|
||
|
|
||
|
switch (stop - start)
|
||
|
{
|
||
|
case 1: /* one-character control sequences */
|
||
|
switch (c1) /* (\o and \l) */
|
||
|
{
|
||
|
case 'o':
|
||
|
*letter = L_OSLASH_L; return TRUE;
|
||
|
case 'O':
|
||
|
*letter = L_OSLASH_U; return TRUE;
|
||
|
case 'l':
|
||
|
*letter = L_LSLASH_L; return TRUE;
|
||
|
case 'L':
|
||
|
*letter = L_LSLASH_L; return TRUE;
|
||
|
case 'i':
|
||
|
*letter = L_INODOT_L; return TRUE;
|
||
|
case 'j':
|
||
|
*letter = L_JNODOT_L; return TRUE;
|
||
|
default:
|
||
|
return FALSE;
|
||
|
}
|
||
|
break;
|
||
|
case 2: /* two character control sequences */
|
||
|
switch (c1) /* (\oe, \ae, \aa, and \ss) */
|
||
|
{
|
||
|
case 'o':
|
||
|
if (c2 == 'e') { *letter = L_OELIG_L; return TRUE; }
|
||
|
case 'O':
|
||
|
if (c2 == 'E') { *letter = L_OELIG_U; return TRUE; }
|
||
|
|
||
|
/* BibTeX 0.99 does not handle \aa and \AA -- but I do!*/
|
||
|
case 'a':
|
||
|
if (c2 == 'e')
|
||
|
{ *letter = L_AELIG_L; return TRUE; }
|
||
|
else if (c2 == 'a')
|
||
|
{ *letter = L_ACIRCLE_L; return TRUE; }
|
||
|
else
|
||
|
return FALSE;
|
||
|
case 'A':
|
||
|
if (c2 == 'E')
|
||
|
{ *letter = L_AELIG_U; return TRUE; }
|
||
|
else if (c2 == 'A')
|
||
|
{ *letter = L_ACIRCLE_U; return TRUE; }
|
||
|
else
|
||
|
return FALSE;
|
||
|
|
||
|
/* uppercase sharp-s -- new with LaTeX 2e (so far all I do
|
||
|
* is recognize it as a "foreign" letter)
|
||
|
*/
|
||
|
case 's':
|
||
|
if (c2 == 's')
|
||
|
{ *letter = L_SSHARP_L; return TRUE; }
|
||
|
else
|
||
|
return FALSE;
|
||
|
case 'S':
|
||
|
if (c2 == 'S')
|
||
|
{ *letter = L_SSHARP_U; return TRUE; }
|
||
|
else
|
||
|
return FALSE;
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
return FALSE;
|
||
|
} /* switch on length of control sequence */
|
||
|
|
||
|
internal_error ("foreign_letter(): should never reach end of function");
|
||
|
return FALSE; /* to keep gcc -Wall happy */
|
||
|
|
||
|
} /* foreign_letter */
|
||
|
|
||
|
|
||
|
/* ------------------------------------------------------------------------
|
||
|
@NAME : purify_special_char()
|
||
|
@INPUT : *src, *dst - pointers into the input and output strings
|
||
|
@OUTPUT : *src - updated to point to the closing brace of the
|
||
|
special char
|
||
|
*dst - updated to point to the next available spot
|
||
|
for copying text to
|
||
|
@RETURNS :
|
||
|
@DESCRIPTION: "Purifies" a BibTeX special character. On input, *src should
|
||
|
point to the opening brace of a special character (ie. the
|
||
|
brace must be at depth 0 of the whole string, and the
|
||
|
character immediately following it must be a backslash).
|
||
|
*dst should point to the next spot to copy into the output
|
||
|
(purified) string. purify_special_char() will skip over the
|
||
|
opening brace and backslash; if the control sequence is one
|
||
|
of LaTeX's foreign letter sequences (as determined by
|
||
|
foreign_letter()), then it is simply copied to *dst.
|
||
|
Otherwise the control sequence is skipped. In either case,
|
||
|
text after the control sequence is either copied (alphabetic
|
||
|
characters) or skipped (anything else, including hyphens,
|
||
|
ties, and digits).
|
||
|
@CALLS : foreign_letter()
|
||
|
@CALLERS : bt_purify_string()
|
||
|
@CREATED : 1997/10/19, GPW
|
||
|
@MODIFIED :
|
||
|
-------------------------------------------------------------------------- */
|
||
|
static void
|
||
|
purify_special_char (char *str, int * src, int * dst)
|
||
|
{
|
||
|
int depth;
|
||
|
int peek;
|
||
|
|
||
|
assert (str[*src] == '{' && str[*src + 1] == '\\');
|
||
|
depth = 1;
|
||
|
|
||
|
*src += 2; /* jump to start of control sequence */
|
||
|
peek = *src; /* scan to end of control sequence */
|
||
|
while (isalpha (str[peek]))
|
||
|
peek++;
|
||
|
if (peek == *src) /* in case of single-char, non-alpha */
|
||
|
peek++; /* control sequence (eg. {\'e}) */
|
||
|
|
||
|
if (foreign_letter (str, *src, peek, NULL))
|
||
|
{
|
||
|
assert (peek - *src == 1 || peek - *src == 2);
|
||
|
str[(*dst)++] = str[(*src)++]; /* copy first char */
|
||
|
if (*src < peek) /* copy second char, downcasing */
|
||
|
str[(*dst)++] = tolower (str[(*src)++]);
|
||
|
}
|
||
|
else /* not a foreign letter -- skip */
|
||
|
{ /* the control sequence entirely */
|
||
|
*src = peek;
|
||
|
}
|
||
|
|
||
|
while (str[*src])
|
||
|
{
|
||
|
switch (str[*src])
|
||
|
{
|
||
|
case '{':
|
||
|
depth++;
|
||
|
(*src)++;
|
||
|
break;
|
||
|
case '}':
|
||
|
depth--;
|
||
|
if (depth == 0) return; /* done with special char */
|
||
|
(*src)++;
|
||
|
break;
|
||
|
default:
|
||
|
if (isalpha (str[*src])) /* copy alphabetic chars */
|
||
|
str[(*dst)++] = str[(*src)++];
|
||
|
else /* skip everything else */
|
||
|
(*src)++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* If we get here, we have unbalanced braces -- the '}' case should
|
||
|
* always hit a depth == 0 point if braces are balanced. No warning,
|
||
|
* though, because a) BibTeX doesn't warn about purifying unbalanced
|
||
|
* strings, and b) we (should have) already warned about it in the
|
||
|
* lexer.
|
||
|
*/
|
||
|
|
||
|
} /* purify_special_char() */
|
||
|
|
||
|
|
||
|
/* ------------------------------------------------------------------------
|
||
|
@NAME : bt_purify_string()
|
||
|
@INOUT : instr
|
||
|
@INPUT : options
|
||
|
@OUTPUT :
|
||
|
@RETURNS : instr - same as input string, but modified in place
|
||
|
@DESCRIPTION: "Purifies" a BibTeX string. This consists of copying
|
||
|
alphanumeric characters, converting hyphens and ties to
|
||
|
space, copying spaces, and skipping everything else. (Well,
|
||
|
almost -- special characters are handled specially, of
|
||
|
course. Basically, accented letters have the control
|
||
|
sequence skipped, while foreign letters have the control
|
||
|
sequence preserved in a reasonable manner. See
|
||
|
purify_special_char() for details.)
|
||
|
@CALLS : purify_special_char()
|
||
|
@CALLERS :
|
||
|
@CREATED : 1997/10/19, GPW
|
||
|
@MODIFIED :
|
||
|
-------------------------------------------------------------------------- */
|
||
|
void
|
||
|
bt_purify_string (char * string, ushort options)
|
||
|
{
|
||
|
int src, /* both indeces into string */
|
||
|
dst;
|
||
|
int depth; /* brace depth in string */
|
||
|
unsigned orig_len;
|
||
|
|
||
|
/*
|
||
|
* Since purification always copies or deletes chars, outstr will
|
||
|
* be no longer than string -- so nothing fancy is required to put
|
||
|
* an upper bound on its eventual size.
|
||
|
*/
|
||
|
|
||
|
depth = 0;
|
||
|
src = 0;
|
||
|
dst = 0;
|
||
|
orig_len = strlen (string);
|
||
|
|
||
|
DBG_ACTION (1, printf ("bt_purify_string(): input = %p (%s)\n",
|
||
|
string, string));
|
||
|
|
||
|
while (string[src] != (char) 0)
|
||
|
{
|
||
|
DBG_ACTION (2, printf (" next: >%c<: ", string[src]));
|
||
|
switch (string[src])
|
||
|
{
|
||
|
case '~': /* "separator" characters -- */
|
||
|
case '-': /* replaced with space */
|
||
|
case ' ': /* and copy an actual space */
|
||
|
string[dst++] = ' ';
|
||
|
src++;
|
||
|
DBG_ACTION (2, printf ("replacing with space"));
|
||
|
break;
|
||
|
case '{':
|
||
|
if (depth == 0 && string[src+1] == '\\')
|
||
|
{
|
||
|
DBG_ACTION (2, printf ("special char found"));
|
||
|
purify_special_char (string, &src, &dst);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
DBG_ACTION (2, printf ("ordinary open brace"));
|
||
|
src++;
|
||
|
}
|
||
|
depth++;
|
||
|
break;
|
||
|
case '}':
|
||
|
DBG_ACTION (2, printf ("close brace"));
|
||
|
depth--;
|
||
|
src++;
|
||
|
break;
|
||
|
default:
|
||
|
if (isalnum (string[src])) /* any alphanumeric char -- */
|
||
|
{
|
||
|
DBG_ACTION (2, printf ("alphanumeric -- copying"));
|
||
|
string[dst++] = string[src++]; /* copy it */
|
||
|
}
|
||
|
else /* anything else -- skip it */
|
||
|
{
|
||
|
DBG_ACTION (2, printf ("non-separator, non-brace, non-alpha"));
|
||
|
src++;
|
||
|
}
|
||
|
} /* switch string[src] */
|
||
|
|
||
|
DBG_ACTION (2, printf ("\n"));
|
||
|
|
||
|
} /* while string[src] */
|
||
|
|
||
|
DBG_ACTION (1, printf ("bt_purify_string(): depth on exit: %d\n", depth));
|
||
|
|
||
|
string[dst] = (char) 0;
|
||
|
assert (strlen (string) <= orig_len);
|
||
|
} /* bt_purify_string() */
|
||
|
|
||
|
|
||
|
/* ======================================================================
|
||
|
* Case-transformation stuff
|
||
|
*/
|
||
|
|
||
|
|
||
|
/* ------------------------------------------------------------------------
|
||
|
@NAME : convert_special_char()
|
||
|
@INPUT : transform
|
||
|
@INOUT : string
|
||
|
src
|
||
|
dst
|
||
|
start_sentence
|
||
|
after_colon
|
||
|
@RETURNS :
|
||
|
@DESCRIPTION: Does case conversion on a special character.
|
||
|
@GLOBALS :
|
||
|
@CALLS :
|
||
|
@CALLERS :
|
||
|
@CREATED : 1997/11/25, GPW
|
||
|
@MODIFIED :
|
||
|
-------------------------------------------------------------------------- */
|
||
|
static void
|
||
|
convert_special_char (char transform,
|
||
|
char * string,
|
||
|
int * src,
|
||
|
int * dst,
|
||
|
boolean * start_sentence,
|
||
|
boolean * after_colon)
|
||
|
{
|
||
|
int depth;
|
||
|
boolean done_special;
|
||
|
int cs_end;
|
||
|
int cs_len; /* counting the backslash */
|
||
|
bt_letter letter;
|
||
|
const char * repl;
|
||
|
int repl_len;
|
||
|
|
||
|
#ifndef ALLOW_WARNINGS
|
||
|
repl = NULL; /* silence "might be used" */
|
||
|
/* uninitialized" warning */
|
||
|
#endif
|
||
|
|
||
|
/* First, copy just the opening brace */
|
||
|
string[(*dst)++] = string[(*src)++];
|
||
|
|
||
|
/*
|
||
|
* Now loop over characters inside the braces -- stop when we reach
|
||
|
* the matching close brace, or when the string ends.
|
||
|
*/
|
||
|
depth = 1; /* because we're in a special char */
|
||
|
done_special = FALSE;
|
||
|
|
||
|
while (string[*src] != 0 && !done_special)
|
||
|
{
|
||
|
switch (string[*src])
|
||
|
{
|
||
|
case '\\': /* a control sequence */
|
||
|
{
|
||
|
cs_end = *src+1; /* scan over chars of c.s. */
|
||
|
while (isalpha (string[cs_end]))
|
||
|
cs_end++;
|
||
|
|
||
|
/*
|
||
|
* OK, now *src points to the backslash (so src+*1 points to
|
||
|
* first char. of control sequence), and cs_end points to
|
||
|
* character immediately following end of control sequence.
|
||
|
* Thus we analyze [*src+1..cs_end] to determine if the control
|
||
|
* sequence is a foreign letter, and use (cs_end - (*src+1) + 1)
|
||
|
* = (cs_end - *src) as the length of the control sequence.
|
||
|
*/
|
||
|
|
||
|
cs_len = cs_end - *src; /* length of cs, counting backslash */
|
||
|
|
||
|
if (foreign_letter (string, *src+1, cs_end, &letter))
|
||
|
{
|
||
|
if (letter == L_OTHER)
|
||
|
internal_error ("impossible foreign letter");
|
||
|
|
||
|
switch (transform)
|
||
|
{
|
||
|
case 'u':
|
||
|
repl = uc_version[(int) letter];
|
||
|
break;
|
||
|
case 'l':
|
||
|
repl = lc_version[(int) letter];
|
||
|
break;
|
||
|
case 't':
|
||
|
if (*start_sentence || *after_colon)
|
||
|
{
|
||
|
repl = uc_version[(int) letter];
|
||
|
*start_sentence = *after_colon = FALSE;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
repl = lc_version[(int) letter];
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
internal_error ("impossible case transform \"%c\"",
|
||
|
transform);
|
||
|
}
|
||
|
|
||
|
repl_len = strlen (repl);
|
||
|
if (repl_len > cs_len)
|
||
|
internal_error
|
||
|
("replacement text longer than original cs");
|
||
|
|
||
|
strncpy (string + *dst, repl, repl_len);
|
||
|
*src = cs_end;
|
||
|
*dst += repl_len;
|
||
|
} /* control sequence is a foreign letter */
|
||
|
else
|
||
|
{
|
||
|
/* not a foreign letter -- just copy the control seq. as is */
|
||
|
|
||
|
|
||
|
strncpy (string + *dst, string + *src, cs_end - *src);
|
||
|
*src += cs_len;
|
||
|
assert (*src == cs_end);
|
||
|
*dst += cs_len;
|
||
|
} /* control sequence not a foreign letter */
|
||
|
|
||
|
break;
|
||
|
} /* case: '\\' */
|
||
|
|
||
|
case '{':
|
||
|
{
|
||
|
string[(*dst)++] = string[(*src)++];
|
||
|
depth++;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case '}':
|
||
|
{
|
||
|
string[(*dst)++] = string[(*src)++];
|
||
|
depth--;
|
||
|
if (depth == 0)
|
||
|
done_special = TRUE;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
default: /* any other character */
|
||
|
{
|
||
|
switch (transform)
|
||
|
{
|
||
|
/*
|
||
|
* Inside special chars, lowercase and title caps are same.
|
||
|
* (At least, that's bibtex's convention. I might change this
|
||
|
* at some point to be a bit smarter.)
|
||
|
*/
|
||
|
case 'l':
|
||
|
case 't':
|
||
|
string[(*dst)++] = tolower (string[(*src)++]);
|
||
|
break;
|
||
|
case 'u':
|
||
|
string[(*dst)++] = toupper (string[(*src)++]);
|
||
|
break;
|
||
|
default:
|
||
|
internal_error ("impossible case transform \"%c\"",
|
||
|
transform);
|
||
|
}
|
||
|
} /* default char */
|
||
|
|
||
|
} /* switch: current char */
|
||
|
|
||
|
} /* while: string or special char not done */
|
||
|
|
||
|
} /* convert_special_char() */
|
||
|
|
||
|
|
||
|
/* ------------------------------------------------------------------------
|
||
|
@NAME : bt_change_case()
|
||
|
@INPUT :
|
||
|
@OUTPUT :
|
||
|
@RETURNS :
|
||
|
@DESCRIPTION: Converts a string (in-place) to either uppercase, lowercase,
|
||
|
or "title capitalization">
|
||
|
@GLOBALS :
|
||
|
@CALLS :
|
||
|
@CALLERS :
|
||
|
@CREATED : 1997/11/25, GPW
|
||
|
@MODIFIED :
|
||
|
-------------------------------------------------------------------------- */
|
||
|
void
|
||
|
bt_change_case (char transform,
|
||
|
char * string,
|
||
|
ushort options)
|
||
|
{
|
||
|
int len;
|
||
|
int depth;
|
||
|
int src, dst; /* indeces into string */
|
||
|
boolean start_sentence;
|
||
|
boolean after_colon;
|
||
|
|
||
|
src = dst = 0;
|
||
|
len = strlen (string);
|
||
|
depth = 0;
|
||
|
|
||
|
start_sentence = TRUE;
|
||
|
after_colon = FALSE;
|
||
|
|
||
|
while (string[src] != 0)
|
||
|
{
|
||
|
switch (string[src])
|
||
|
{
|
||
|
case '{':
|
||
|
|
||
|
/*
|
||
|
* At start of special character? The entire special char.
|
||
|
* will be handled here, as follows:
|
||
|
* - text at any brace-depth within the s.c. is case-mangled;
|
||
|
* punctuation (sentence endings, colons) are ignored
|
||
|
* - control sequences are left alone, unless they are
|
||
|
* one of the "foreign letter" control sequences, in
|
||
|
* which case they're converted to the appropriate string
|
||
|
* according to the uc_version or lc_version tables.
|
||
|
*/
|
||
|
if (depth == 0 && string[src+1] == '\\')
|
||
|
{
|
||
|
convert_special_char (transform, string, &src, &dst,
|
||
|
&start_sentence, &after_colon);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Otherwise, it's just something in braces. This is probably
|
||
|
* a proper noun or something encased in braces to protect it
|
||
|
* from case-mangling, so we do not case-mangle it. However,
|
||
|
* we *do* switch out of start_sentence or after_colon mode if
|
||
|
* we happen to be there (otherwise we'll do the wrong thing
|
||
|
* once we're out of the braces).
|
||
|
*/
|
||
|
else
|
||
|
{
|
||
|
string[dst++] = string[src++];
|
||
|
start_sentence = after_colon = FALSE;
|
||
|
depth++;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case '}':
|
||
|
string[dst++] = string[src++];
|
||
|
depth--;
|
||
|
break;
|
||
|
|
||
|
/*
|
||
|
* Sentence-ending punctuation and colons are handled separately
|
||
|
* to allow for exact mimicing of BibTeX's behaviour. I happen
|
||
|
* to think that this behaviour (capitalize first word of sentences
|
||
|
* in a title) is better than BibTeX's, but I want to keep my
|
||
|
* options open for a future goal of perfect compatability.
|
||
|
*/
|
||
|
case '.':
|
||
|
case '?':
|
||
|
case '!':
|
||
|
start_sentence = TRUE;
|
||
|
string[dst++] = string[src++];
|
||
|
break;
|
||
|
|
||
|
case ':':
|
||
|
after_colon = TRUE;
|
||
|
string[dst++] = string[src++];
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
if (isspace (string[src]))
|
||
|
{
|
||
|
string[dst++] = string[src++];
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (depth == 0)
|
||
|
{
|
||
|
switch (transform)
|
||
|
{
|
||
|
case 'u':
|
||
|
string[dst++] = toupper (string[src++]);
|
||
|
break;
|
||
|
case 'l':
|
||
|
string[dst++] = tolower (string[src++]);
|
||
|
break;
|
||
|
case 't':
|
||
|
if (start_sentence || after_colon)
|
||
|
{
|
||
|
/*
|
||
|
* XXX BibTeX only preserves case of character
|
||
|
* immediately after a colon; I do two things
|
||
|
* differently: first, I pay attention to sentence
|
||
|
* punctuation, and second I force uppercase
|
||
|
* at start of sentence or after a colon.
|
||
|
*/
|
||
|
string[dst++] = toupper (string[src++]);
|
||
|
start_sentence = after_colon = FALSE;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
string[dst++] = tolower (string[src++]);
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
internal_error ("impossible case transform \"%c\"",
|
||
|
transform);
|
||
|
}
|
||
|
} /* depth == 0 */
|
||
|
else
|
||
|
{
|
||
|
string[dst++] = string[src++];
|
||
|
}
|
||
|
} /* not blank */
|
||
|
} /* switch on current character */
|
||
|
|
||
|
} /* while not at end of string */
|
||
|
|
||
|
} /* bt_change_case */
|