tellico/src/translators/btparse/string_util.c

/* ------------------------------------------------------------------------
@NAME       : string_util.c
@DESCRIPTION: Various string-processing utility functions:
                bt_purify_string()
                bt_change_case()

              and their helpers:
                foreign_letter()
                purify_special_char()
@GLOBALS    : 
@CALLS      : 
@CALLERS    : 
@CREATED    : 1997/10/19, Greg Ward
@MODIFIED   : 1997/11/25, GPW: renamed to from purify.c to string_util.c
                               added bt_change_case() and friends
@VERSION    : $Id: string_util.c,v 1.10 1999/10/28 22:50:28 greg Rel $
-------------------------------------------------------------------------- */

#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <assert.h>
#include "error.h"
#include "btparse.h"
#include "bt_debug.h"


/* 
 * These definitions should be fixed to be consistent with HTML 
 * entities, just for fun.  And perhaps I should add entries for
 * accented letters (at least those supported by TeX and HTML).
 */
typedef enum
{
   L_OTHER,                             /* not a "foreign" letter */
   L_OSLASH_L,                          /* Eastern European {\o} */
   L_OSLASH_U,
   L_LSLASH_L,                          /* {\l} */
   L_LSLASH_U,
   L_OELIG_L,                           /* Latin {\oe} ligature */
   L_OELIG_U,
   L_AELIG_L,                           /* {\ae} ligature */
   L_AELIG_U,
   L_SSHARP_L,                          /* German "sharp s" {\ss} */
   L_SSHARP_U,
   L_ACIRCLE_L,                         /* Nordic {\aa} */
   L_ACIRCLE_U,
   L_INODOT_L,                          /* undotted i: {\i} */
   L_JNODOT_L                           /* {\j} */
} bt_letter;


static const char * uc_version[] = 
{
   NULL,                                /* L_OTHER */
   "\\O",                               /* L_OSLASH_L */
   "\\O",                               /* L_OSLASH_U */
   "\\L",                               /* L_LSLASH_L */
   "\\L",                               /* L_LSLASH_U */
   "\\OE",                              /* L_OELIG_L */
   "\\OE",                              /* L_OELIG_U */
   "\\AE",                              /* L_AELIG_L */
   "\\AE",                              /* L_AELIG_U */
   "SS",                                /* L_SSHARP_L -- for LaTeX 2.09 */
   "\\SS",                              /* L_SSHARP_U */
   "\\AA",                              /* L_ACIRCLE_L */
   "\\AA",                              /* L_ACIRCLE_U */
   "I",                                 /* L_INODOT_L */
   "J"                                  /* L_JNODOT_L */
};

static const char * lc_version[] = 
{
   NULL,                                /* L_OTHER */
   "\\o",                               /* L_OSLASH_L */
   "\\o",                               /* L_OSLASH_U */
   "\\l",                               /* L_LSLASH_L */
   "\\l",                               /* L_LSLASH_U */
   "\\oe",                              /* L_OELIG_L */
   "\\oe",                              /* L_OELIG_U */
   "\\ae",                              /* L_AELIG_L */
   "\\ae",                              /* L_AELIG_U */
   "\\ss",                              /* L_SSHARP_L */
   "\\ss",                              /* L_SSHARP_U */
   "\\aa",                              /* L_ACIRCLE_L */
   "\\aa",                              /* L_ACIRCLE_U */
   "\\i",                               /* L_INODOT_L */
   "\\j"                                /* L_JNODOT_L */
};      


/* ------------------------------------------------------------------------
@NAME       : foreign_letter()
@INPUT      : str
              start
              stop
@OUTPUT     : letter
@RETURNS    : TRUE if the string delimited by start and stop is a foreign
              letter control sequence
@DESCRIPTION: Determines if a character sequence is one of (La)TeX's
              "foreign letter" control sequences (l, o, ae, oe, aa, ss, plus
              uppercase versions).  If `letter' is non-NULL, returns which
              letter was found in it (as a bt_letter value).
@CALLS      : 
@CALLERS    : purify_special_char()
@CREATED    : 1997/10/19, GPW
@MODIFIED   : 
-------------------------------------------------------------------------- */
static boolean
foreign_letter (char *str, int start, int stop, bt_letter * letter)
{
   char      c1, c2;
   bt_letter dummy;


   /* 
    * This is written for speed, not flexibility -- adding new foreign
    * letters would be trying and vexatious.
    * 
    * N.B. my gold standard list of foreign letters is Kopka and Daly's
    * *A Guide to LaTeX 2e*, section 2.5.6.
    */

   if (letter == NULL)                  /* so we can assign to *letter */
      letter = &dummy;                  /* without compunctions */
   *letter = L_OTHER;                   /* assume not a "foreign" letter */

   c1 = str[start+0];                   /* only two characters that we're */
   c2 = str[start+1];                   /* interested in */

   switch (stop - start)
   {
      case 1:                           /* one-character control sequences */
         switch (c1)                    /* (\o and \l) */
         {
            case 'o':
               *letter = L_OSLASH_L; return TRUE;
            case 'O':
               *letter = L_OSLASH_U; return TRUE;
            case 'l':
               *letter = L_LSLASH_L; return TRUE;
            case 'L': 
               *letter = L_LSLASH_L; return TRUE;
            case 'i':
               *letter = L_INODOT_L; return TRUE;
            case 'j':
               *letter = L_JNODOT_L; return TRUE;
            default:
               return FALSE;
         }
         break;
      case 2:                           /* two character control sequences */
         switch (c1)                    /* (\oe, \ae, \aa, and \ss) */
         {
            case 'o':
               if (c2 == 'e') { *letter = L_OELIG_L; return TRUE; }
            case 'O':
               if (c2 == 'E') { *letter = L_OELIG_U; return TRUE; }

            /* BibTeX 0.99 does not handle \aa and \AA -- but I do!*/
            case 'a':
               if (c2 == 'e')
                  { *letter = L_AELIG_L; return TRUE; }
               else if (c2 == 'a')
                  { *letter = L_ACIRCLE_L; return TRUE; }
               else
                  return FALSE;
            case 'A':
               if (c2 == 'E')
                  { *letter = L_AELIG_U; return TRUE; }
               else if (c2 == 'A')
                  { *letter = L_ACIRCLE_U; return TRUE; }
               else
                  return FALSE;

            /* uppercase sharp-s -- new with LaTeX 2e (so far all I do
             * is recognize it as a "foreign" letter)
             */
            case 's':
               if (c2 == 's')
                  { *letter = L_SSHARP_L; return TRUE; }
               else 
                  return FALSE;
            case 'S':
               if (c2 == 'S')
                  { *letter = L_SSHARP_U; return TRUE; }
               else 
                  return FALSE;
         }
         break;
      default:
         return FALSE;
   } /* switch on length of control sequence */

   internal_error ("foreign_letter(): should never reach end of function");
   return FALSE;                        /* to keep gcc -Wall happy */

} /* foreign_letter */


/* ------------------------------------------------------------------------
@NAME       : purify_special_char()
@INPUT      : *src, *dst - pointers into the input and output strings
@OUTPUT     : *src       - updated to point to the closing brace of the 
                           special char
              *dst       - updated to point to the next available spot
                           for copying text to
@RETURNS    : 
@DESCRIPTION: "Purifies" a BibTeX special character.  On input, *src should
              point to the opening brace of a special character (ie. the
              brace must be at depth 0 of the whole string, and the
              character immediately following it must be a backslash).
              *dst should point to the next spot to copy into the output
              (purified) string.  purify_special_char() will skip over the
              opening brace and backslash; if the control sequence is one
              of LaTeX's foreign letter sequences (as determined by
              foreign_letter()), then it is simply copied to *dst.
              Otherwise the control sequence is skipped.  In either case,
              text after the control sequence is either copied (alphabetic
              characters) or skipped (anything else, including hyphens,
              ties, and digits).
@CALLS      : foreign_letter()
@CALLERS    : bt_purify_string()
@CREATED    : 1997/10/19, GPW
@MODIFIED   : 
-------------------------------------------------------------------------- */
static void
purify_special_char (char *str, int * src, int * dst)
{
   int    depth;
   int    peek;

   assert (str[*src] == '{' && str[*src + 1] == '\\');
   depth = 1;

   *src += 2;                           /* jump to start of control sequence */
   peek = *src;                         /* scan to end of control sequence */
   while (isalpha (str[peek]))
      peek++;
   if (peek == *src)                    /* in case of single-char, non-alpha */
      peek++;                           /* control sequence (eg. {\'e}) */

   if (foreign_letter (str, *src, peek, NULL))
   {
      assert (peek - *src == 1 || peek - *src == 2);
      str[(*dst)++] = str[(*src)++];    /* copy first char */
      if (*src < peek)                  /* copy second char, downcasing */
         str[(*dst)++] = tolower (str[(*src)++]);
   }
   else                                 /* not a foreign letter -- skip */
   {                                    /* the control sequence entirely */
      *src = peek;
   }

   while (str[*src])
   {
      switch (str[*src])
      {
         case '{':
            depth++;
            (*src)++;
            break;
         case '}':
            depth--;
            if (depth == 0) return;     /* done with special char */
            (*src)++;
            break;
         default:
            if (isalpha (str[*src]))    /* copy alphabetic chars */
               str[(*dst)++] = str[(*src)++];
            else                        /* skip everything else */
               (*src)++;
      }
   }

   /* 
    * If we get here, we have unbalanced braces -- the '}' case should
    * always hit a depth == 0 point if braces are balanced.  No warning,
    * though, because a) BibTeX doesn't warn about purifying unbalanced
    * strings, and b) we (should have) already warned about it in the
    * lexer.
    */

} /* purify_special_char() */


/* ------------------------------------------------------------------------
@NAME       : bt_purify_string()
@INOUT      : instr
@INPUT      : options
@OUTPUT     : 
@RETURNS    : instr   - same as input string, but modified in place
@DESCRIPTION: "Purifies" a BibTeX string.  This consists of copying
              alphanumeric characters, converting hyphens and ties to
              space, copying spaces, and skipping everything else.  (Well,
              almost -- special characters are handled specially, of
              course.  Basically, accented letters have the control
              sequence skipped, while foreign letters have the control
              sequence preserved in a reasonable manner.  See
              purify_special_char() for details.)
@CALLS      : purify_special_char()
@CALLERS    : 
@CREATED    : 1997/10/19, GPW
@MODIFIED   : 
-------------------------------------------------------------------------- */
void
bt_purify_string (char * string, ushort options)
{
   int    src,                          /* both indeces into string */
          dst;
   int    depth;                        /* brace depth in string */
   unsigned orig_len;

   /* 
    * Since purification always copies or deletes chars, outstr will
    * be no longer than string -- so nothing fancy is required to put
    * an upper bound on its eventual size.
    */

   depth = 0;
   src = 0;
   dst = 0;
   orig_len = strlen (string);

   DBG_ACTION (1, printf ("bt_purify_string(): input = %p (%s)\n", 
                          string, string));

   while (string[src] != (char) 0)
   {
      DBG_ACTION (2, printf ("  next: >%c<: ", string[src]));
      switch (string[src])
      {
         case '~':                      /* "separator" characters -- */
         case '-':                      /* replaced with space */
         case ' ':                      /* and copy an actual space */
            string[dst++] = ' ';
            src++;
            DBG_ACTION (2, printf ("replacing with space"));
            break;
         case '{':
            if (depth == 0 && string[src+1] == '\\')
            {
               DBG_ACTION (2, printf ("special char found"));
               purify_special_char (string, &src, &dst);
            }
            else
            {
               DBG_ACTION (2, printf ("ordinary open brace"));
               src++;
            }
            depth++;
            break;
         case '}':
            DBG_ACTION (2, printf ("close brace"));
            depth--;
            src++;
            break;
         default:
            if (isalnum (string[src]))         /* any alphanumeric char -- */
            {
               DBG_ACTION (2, printf ("alphanumeric -- copying"));
               string[dst++] = string[src++]; /* copy it */
            }
            else                        /* anything else -- skip it */
            {
               DBG_ACTION (2, printf ("non-separator, non-brace, non-alpha"));
               src++;
            }
      } /* switch string[src] */

      DBG_ACTION (2, printf ("\n"));

   } /* while string[src] */

   DBG_ACTION (1, printf ("bt_purify_string(): depth on exit: %d\n", depth));

   string[dst] = (char) 0;
   assert (strlen (string) <= orig_len);
} /* bt_purify_string() */


/* ======================================================================
 * Case-transformation stuff
 */


/* ------------------------------------------------------------------------
@NAME       : convert_special_char()
@INPUT      : transform
@INOUT      : string
              src
              dst
              start_sentence
              after_colon
@RETURNS    : 
@DESCRIPTION: Does case conversion on a special character.
@GLOBALS    : 
@CALLS      : 
@CALLERS    : 
@CREATED    : 1997/11/25, GPW
@MODIFIED   : 
-------------------------------------------------------------------------- */
static void
convert_special_char (char transform, 
                      char * string,
                      int * src,
                      int * dst, 
                      boolean * start_sentence,
                      boolean * after_colon)
{
   int       depth;
   boolean   done_special;
   int       cs_end;
   int       cs_len;                    /* counting the backslash */
   bt_letter letter;
   const char *    repl;
   int       repl_len;

#ifndef ALLOW_WARNINGS
   repl = NULL;                         /* silence "might be used" */
                                        /* uninitialized" warning */
#endif

   /* First, copy just the opening brace */
   string[(*dst)++] = string[(*src)++];

   /* 
    * Now loop over characters inside the braces -- stop when we reach
    * the matching close brace, or when the string ends.
    */
   depth = 1;                           /* because we're in a special char */
   done_special = FALSE;

   while (string[*src] != 0 && !done_special)
   {
      switch (string[*src])
      {
         case '\\':                     /* a control sequence */
         {
            cs_end = *src+1;            /* scan over chars of c.s. */
            while (isalpha (string[cs_end])) 
               cs_end++;

            /* 
             * OK, now *src points to the backslash (so src+*1 points to
             * first char. of control sequence), and cs_end points to
             * character immediately following end of control sequence.
             * Thus we analyze [*src+1..cs_end] to determine if the control
             * sequence is a foreign letter, and use (cs_end - (*src+1) + 1)
             * = (cs_end - *src) as the length of the control sequence.
             */

            cs_len = cs_end - *src;     /* length of cs, counting backslash */

            if (foreign_letter (string, *src+1, cs_end, &letter))
            {
               if (letter == L_OTHER)
                  internal_error ("impossible foreign letter");

               switch (transform)
               {
                  case 'u':
                     repl = uc_version[(int) letter];
                     break;
                  case 'l':
                     repl = lc_version[(int) letter];
                     break;
                  case 't':
                     if (*start_sentence || *after_colon)
                     {
                        repl = uc_version[(int) letter];
                        *start_sentence = *after_colon = FALSE;
                     }
                     else
                     {
                        repl = lc_version[(int) letter];
                     }
                     break;
                  default:
                     internal_error ("impossible case transform \"%c\"",
                                     transform);
               }

               repl_len = strlen (repl);
               if (repl_len > cs_len)
                  internal_error
                     ("replacement text longer than original cs");

               strncpy (string + *dst, repl, repl_len);
               *src = cs_end;
               *dst += repl_len;
            } /* control sequence is a foreign letter */
            else
            {
               /* not a foreign letter -- just copy the control seq. as is */


               strncpy (string + *dst, string + *src, cs_end - *src);
               *src += cs_len;
               assert (*src == cs_end);
               *dst += cs_len;
            } /* control sequence not a foreign letter */

            break;
         } /* case: '\\' */

         case '{':
         {
            string[(*dst)++] = string[(*src)++];
            depth++;
            break;
         }

         case '}':
         {
            string[(*dst)++] = string[(*src)++];
            depth--;
            if (depth == 0)
               done_special = TRUE;
            break;
         }

         default:                       /* any other character */
         {
            switch (transform)
            {
               /* 
                * Inside special chars, lowercase and title caps are same.
                * (At least, that's bibtex's convention.  I might change this
                * at some point to be a bit smarter.)
                */
               case 'l':
               case 't':
                  string[(*dst)++] = tolower (string[(*src)++]);
                  break;
               case 'u':
                  string[(*dst)++] = toupper (string[(*src)++]);
                  break;
               default:
                  internal_error ("impossible case transform \"%c\"",
                                  transform);
            }
         } /* default char */

      } /* switch: current char */

   } /* while: string or special char not done */

} /* convert_special_char() */


/* ------------------------------------------------------------------------
@NAME       : bt_change_case()
@INPUT      : 
@OUTPUT     : 
@RETURNS    : 
@DESCRIPTION: Converts a string (in-place) to either uppercase, lowercase,
              or "title capitalization">
@GLOBALS    : 
@CALLS      : 
@CALLERS    : 
@CREATED    : 1997/11/25, GPW
@MODIFIED   : 
-------------------------------------------------------------------------- */
void
bt_change_case (char   transform,
                char * string,
                ushort options)
{
   int    len;
   int    depth;
   int    src, dst;                     /* indeces into string */
   boolean start_sentence;
   boolean after_colon;

   src = dst = 0;
   len = strlen (string);
   depth = 0;

   start_sentence = TRUE;
   after_colon = FALSE;

   while (string[src] != 0)
   {
      switch (string[src])
      {
         case '{': 

            /* 
             * At start of special character?  The entire special char.
             * will be handled here, as follows:
             *   - text at any brace-depth within the s.c. is case-mangled;
             *     punctuation (sentence endings, colons) are ignored
             *   - control sequences are left alone, unless they are
             *     one of the "foreign letter" control sequences, in
             *     which case they're converted to the appropriate string
             *     according to the uc_version or lc_version tables.
             */
            if (depth == 0 && string[src+1] == '\\')
            {
               convert_special_char (transform, string, &src, &dst, 
                                     &start_sentence, &after_colon);
            }

            /*
             * Otherwise, it's just something in braces.  This is probably
             * a proper noun or something encased in braces to protect it
             * from case-mangling, so we do not case-mangle it.  However,
             * we *do* switch out of start_sentence or after_colon mode if
             * we happen to be there (otherwise we'll do the wrong thing
             * once we're out of the braces).
             */
            else
            {
               string[dst++] = string[src++];
               start_sentence = after_colon = FALSE;
               depth++;
            }
            break;

         case '}':
            string[dst++] = string[src++];
            depth--;
            break;

         /*
          * Sentence-ending punctuation and colons are handled separately
          * to allow for exact mimicing of BibTeX's behaviour.  I happen
          * to think that this behaviour (capitalize first word of sentences
          * in a title) is better than BibTeX's, but I want to keep my
          * options open for a future goal of perfect compatability.
          */
         case '.':
         case '?':
         case '!':
            start_sentence = TRUE;
            string[dst++] = string[src++];
            break;

         case ':':
            after_colon = TRUE;
            string[dst++] = string[src++];
            break;

         default:
            if (isspace (string[src]))
            {
               string[dst++] = string[src++];
            }
            else
            {
               if (depth == 0)
               {
                  switch (transform)
                  {
                     case 'u':
                        string[dst++] = toupper (string[src++]);
                        break;
                     case 'l':
                        string[dst++] = tolower (string[src++]);
                        break;
                     case 't':
                        if (start_sentence || after_colon)
                        {
                           /* 
                            * XXX BibTeX only preserves case of character
                            * immediately after a colon; I do two things
                            * differently: first, I pay attention to sentence
                            * punctuation, and second I force uppercase
                            * at start of sentence or after a colon.
                            */
                           string[dst++] = toupper (string[src++]);
                           start_sentence = after_colon = FALSE;
                        }
                        else
                        {
                           string[dst++] = tolower (string[src++]);
                        }
                        break;
                     default:
                        internal_error ("impossible case transform \"%c\"",
                                        transform);
                  }
               } /* depth == 0 */
               else
               {
                  string[dst++] = string[src++];
               }
            } /* not blank */
      } /* switch on current character */
                                  
   } /* while not at end of string */

} /* bt_change_case */
Added KDE3 version of Tellico git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/applications/tellico@1097620 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 15 years ago			`/* ------------------------------------------------------------------------`
			`@NAME : string_util.c`
			`@DESCRIPTION: Various string-processing utility functions:`
			`bt_purify_string()`
			`bt_change_case()`

			`and their helpers:`
			`foreign_letter()`
			`purify_special_char()`
			`@GLOBALS :`
			`@CALLS :`
			`@CALLERS :`
			`@CREATED : 1997/10/19, Greg Ward`
			`@MODIFIED : 1997/11/25, GPW: renamed to from purify.c to string_util.c`
			`added bt_change_case() and friends`
			`@VERSION : $Id: string_util.c,v 1.10 1999/10/28 22:50:28 greg Rel $`
			`-------------------------------------------------------------------------- */`

			`#include <stdlib.h>`
			`#include <ctype.h>`
			`#include <string.h>`
			`#include <assert.h>`
			`#include "error.h"`
			`#include "btparse.h"`
			`#include "bt_debug.h"`


			`/*`
			`* These definitions should be fixed to be consistent with HTML`
			`* entities, just for fun. And perhaps I should add entries for`
			`* accented letters (at least those supported by TeX and HTML).`
			`*/`
			`typedef enum`
			`{`
			`L_OTHER, /* not a "foreign" letter */`
			`L_OSLASH_L, /* Eastern European {\o} */`
			`L_OSLASH_U,`
			`L_LSLASH_L, /* {\l} */`
			`L_LSLASH_U,`
			`L_OELIG_L, /* Latin {\oe} ligature */`
			`L_OELIG_U,`
			`L_AELIG_L, /* {\ae} ligature */`
			`L_AELIG_U,`
			`L_SSHARP_L, /* German "sharp s" {\ss} */`
			`L_SSHARP_U,`
			`L_ACIRCLE_L, /* Nordic {\aa} */`
			`L_ACIRCLE_U,`
			`L_INODOT_L, /* undotted i: {\i} */`
			`L_JNODOT_L /* {\j} */`
			`} bt_letter;`


			`static const char * uc_version[] =`
			`{`
			`NULL, /* L_OTHER */`
			`"\\O", /* L_OSLASH_L */`
			`"\\O", /* L_OSLASH_U */`
			`"\\L", /* L_LSLASH_L */`
			`"\\L", /* L_LSLASH_U */`
			`"\\OE", /* L_OELIG_L */`
			`"\\OE", /* L_OELIG_U */`
			`"\\AE", /* L_AELIG_L */`
			`"\\AE", /* L_AELIG_U */`
			`"SS", /* L_SSHARP_L -- for LaTeX 2.09 */`
			`"\\SS", /* L_SSHARP_U */`
			`"\\AA", /* L_ACIRCLE_L */`
			`"\\AA", /* L_ACIRCLE_U */`
			`"I", /* L_INODOT_L */`
			`"J" /* L_JNODOT_L */`
			`};`

			`static const char * lc_version[] =`
			`{`
			`NULL, /* L_OTHER */`
			`"\\o", /* L_OSLASH_L */`
			`"\\o", /* L_OSLASH_U */`
			`"\\l", /* L_LSLASH_L */`
			`"\\l", /* L_LSLASH_U */`
			`"\\oe", /* L_OELIG_L */`
			`"\\oe", /* L_OELIG_U */`
			`"\\ae", /* L_AELIG_L */`
			`"\\ae", /* L_AELIG_U */`
			`"\\ss", /* L_SSHARP_L */`
			`"\\ss", /* L_SSHARP_U */`
			`"\\aa", /* L_ACIRCLE_L */`
			`"\\aa", /* L_ACIRCLE_U */`
			`"\\i", /* L_INODOT_L */`
			`"\\j" /* L_JNODOT_L */`
			`};`



			`/* ------------------------------------------------------------------------`
			`@NAME : foreign_letter()`
			`@INPUT : str`
			`start`
			`stop`
			`@OUTPUT : letter`
			`@RETURNS : TRUE if the string delimited by start and stop is a foreign`
			`letter control sequence`
			`@DESCRIPTION: Determines if a character sequence is one of (La)TeX's`
			`"foreign letter" control sequences (l, o, ae, oe, aa, ss, plus`
			uppercase versions). If `letter' is non-NULL, returns which
			`letter was found in it (as a bt_letter value).`
			`@CALLS :`
			`@CALLERS : purify_special_char()`
			`@CREATED : 1997/10/19, GPW`
			`@MODIFIED :`
			`-------------------------------------------------------------------------- */`
			`static boolean`
			`foreign_letter (char str, int start, int stop, bt_letter letter)`
			`{`
			`char c1, c2;`
			`bt_letter dummy;`


			`/*`
			`* This is written for speed, not flexibility -- adding new foreign`
			`* letters would be trying and vexatious.`
			`*`
			`* N.B. my gold standard list of foreign letters is Kopka and Daly's`
			`* A Guide to LaTeX 2e, section 2.5.6.`
			`*/`

			`if (letter == NULL) /* so we can assign to letter /`
			`letter = &dummy; /* without compunctions */`
			`letter = L_OTHER; / assume not a "foreign" letter */`

			`c1 = str[start+0]; /* only two characters that we're */`
			`c2 = str[start+1]; /* interested in */`

			`switch (stop - start)`
			`{`
			`case 1: /* one-character control sequences */`
			`switch (c1) /* (\o and \l) */`
			`{`
			`case 'o':`
			`*letter = L_OSLASH_L; return TRUE;`
			`case 'O':`
			`*letter = L_OSLASH_U; return TRUE;`
			`case 'l':`
			`*letter = L_LSLASH_L; return TRUE;`
			`case 'L':`
			`*letter = L_LSLASH_L; return TRUE;`
			`case 'i':`
			`*letter = L_INODOT_L; return TRUE;`
			`case 'j':`
			`*letter = L_JNODOT_L; return TRUE;`
			`default:`
			`return FALSE;`
			`}`
			`break;`
			`case 2: /* two character control sequences */`
			`switch (c1) /* (\oe, \ae, \aa, and \ss) */`
			`{`
			`case 'o':`
			`if (c2 == 'e') { *letter = L_OELIG_L; return TRUE; }`
			`case 'O':`
			`if (c2 == 'E') { *letter = L_OELIG_U; return TRUE; }`

			`/* BibTeX 0.99 does not handle \aa and \AA -- but I do!*/`
			`case 'a':`
			`if (c2 == 'e')`
			`{ *letter = L_AELIG_L; return TRUE; }`
			`else if (c2 == 'a')`
			`{ *letter = L_ACIRCLE_L; return TRUE; }`
			`else`
			`return FALSE;`
			`case 'A':`
			`if (c2 == 'E')`
			`{ *letter = L_AELIG_U; return TRUE; }`
			`else if (c2 == 'A')`
			`{ *letter = L_ACIRCLE_U; return TRUE; }`
			`else`
			`return FALSE;`

			`/* uppercase sharp-s -- new with LaTeX 2e (so far all I do`
			`* is recognize it as a "foreign" letter)`
			`*/`
			`case 's':`
			`if (c2 == 's')`
			`{ *letter = L_SSHARP_L; return TRUE; }`
			`else`
			`return FALSE;`
			`case 'S':`
			`if (c2 == 'S')`
			`{ *letter = L_SSHARP_U; return TRUE; }`
			`else`
			`return FALSE;`
			`}`
			`break;`
			`default:`
			`return FALSE;`
			`} /* switch on length of control sequence */`

			`internal_error ("foreign_letter(): should never reach end of function");`
			`return FALSE; /* to keep gcc -Wall happy */`

			`} /* foreign_letter */`


			`/* ------------------------------------------------------------------------`
			`@NAME : purify_special_char()`
			`@INPUT : src, dst - pointers into the input and output strings`
			`@OUTPUT : *src - updated to point to the closing brace of the`
			`special char`
			`*dst - updated to point to the next available spot`
			`for copying text to`
			`@RETURNS :`
			`@DESCRIPTION: "Purifies" a BibTeX special character. On input, *src should`
			`point to the opening brace of a special character (ie. the`
			`brace must be at depth 0 of the whole string, and the`
			`character immediately following it must be a backslash).`
			`*dst should point to the next spot to copy into the output`
			`(purified) string. purify_special_char() will skip over the`
			`opening brace and backslash; if the control sequence is one`
			`of LaTeX's foreign letter sequences (as determined by`
			`foreign_letter()), then it is simply copied to *dst.`
			`Otherwise the control sequence is skipped. In either case,`
			`text after the control sequence is either copied (alphabetic`
			`characters) or skipped (anything else, including hyphens,`
			`ties, and digits).`
			`@CALLS : foreign_letter()`
			`@CALLERS : bt_purify_string()`
			`@CREATED : 1997/10/19, GPW`
			`@MODIFIED :`
			`-------------------------------------------------------------------------- */`
			`static void`
			`purify_special_char (char str, int src, int * dst)`
			`{`
			`int depth;`
			`int peek;`

			`assert (str[src] == '{' && str[src + 1] == '\\');`
			`depth = 1;`

			`src += 2; / jump to start of control sequence */`
			`peek = src; / scan to end of control sequence */`
			`while (isalpha (str[peek]))`
			`peek++;`
			`if (peek == src) / in case of single-char, non-alpha */`
			`peek++; /* control sequence (eg. {\'e}) */`

			`if (foreign_letter (str, *src, peek, NULL))`
			`{`
			`assert (peek - src == 1 \|\| peek - src == 2);`
			`str[(dst)++] = str[(src)++]; /* copy first char */`
			`if (src < peek) / copy second char, downcasing */`
			`str[(dst)++] = tolower (str[(src)++]);`
			`}`
			`else /* not a foreign letter -- skip */`
			`{ /* the control sequence entirely */`
			`*src = peek;`
			`}`

			`while (str[*src])`
			`{`
			`switch (str[*src])`
			`{`
			`case '{':`
			`depth++;`
			`(*src)++;`
			`break;`
			`case '}':`
			`depth--;`
			`if (depth == 0) return; /* done with special char */`
			`(*src)++;`
			`break;`
			`default:`
			`if (isalpha (str[src])) / copy alphabetic chars */`
			`str[(dst)++] = str[(src)++];`
			`else /* skip everything else */`
			`(*src)++;`
			`}`
			`}`

			`/*`
			`* If we get here, we have unbalanced braces -- the '}' case should`
			`* always hit a depth == 0 point if braces are balanced. No warning,`
			`* though, because a) BibTeX doesn't warn about purifying unbalanced`
			`* strings, and b) we (should have) already warned about it in the`
			`* lexer.`
			`*/`

			`} /* purify_special_char() */`


			`/* ------------------------------------------------------------------------`
			`@NAME : bt_purify_string()`
			`@INOUT : instr`
			`@INPUT : options`
			`@OUTPUT :`
			`@RETURNS : instr - same as input string, but modified in place`
			`@DESCRIPTION: "Purifies" a BibTeX string. This consists of copying`
			`alphanumeric characters, converting hyphens and ties to`
			`space, copying spaces, and skipping everything else. (Well,`
			`almost -- special characters are handled specially, of`
			`course. Basically, accented letters have the control`
			`sequence skipped, while foreign letters have the control`
			`sequence preserved in a reasonable manner. See`
			`purify_special_char() for details.)`
			`@CALLS : purify_special_char()`
			`@CALLERS :`
			`@CREATED : 1997/10/19, GPW`
			`@MODIFIED :`
			`-------------------------------------------------------------------------- */`
			`void`
			`bt_purify_string (char * string, ushort options)`
			`{`
			`int src, /* both indeces into string */`
			`dst;`
			`int depth; /* brace depth in string */`
			`unsigned orig_len;`

			`/*`
			`* Since purification always copies or deletes chars, outstr will`
			`* be no longer than string -- so nothing fancy is required to put`
			`* an upper bound on its eventual size.`
			`*/`

			`depth = 0;`
			`src = 0;`
			`dst = 0;`
			`orig_len = strlen (string);`

			`DBG_ACTION (1, printf ("bt_purify_string(): input = %p (%s)\n",`
			`string, string));`

			`while (string[src] != (char) 0)`
			`{`
			`DBG_ACTION (2, printf (" next: >%c<: ", string[src]));`
			`switch (string[src])`
			`{`
			`case '~': /* "separator" characters -- */`
			`case '-': /* replaced with space */`
			`case ' ': /* and copy an actual space */`
			`string[dst++] = ' ';`
			`src++;`
			`DBG_ACTION (2, printf ("replacing with space"));`
			`break;`
			`case '{':`
			`if (depth == 0 && string[src+1] == '\\')`
			`{`
			`DBG_ACTION (2, printf ("special char found"));`
			`purify_special_char (string, &src, &dst);`
			`}`
			`else`
			`{`
			`DBG_ACTION (2, printf ("ordinary open brace"));`
			`src++;`
			`}`
			`depth++;`
			`break;`
			`case '}':`
			`DBG_ACTION (2, printf ("close brace"));`
			`depth--;`
			`src++;`
			`break;`
			`default:`
			`if (isalnum (string[src])) /* any alphanumeric char -- */`
			`{`
			`DBG_ACTION (2, printf ("alphanumeric -- copying"));`
			`string[dst++] = string[src++]; /* copy it */`
			`}`
			`else /* anything else -- skip it */`
			`{`
			`DBG_ACTION (2, printf ("non-separator, non-brace, non-alpha"));`
			`src++;`
			`}`
			`} /* switch string[src] */`

			`DBG_ACTION (2, printf ("\n"));`

			`} /* while string[src] */`

			`DBG_ACTION (1, printf ("bt_purify_string(): depth on exit: %d\n", depth));`

			`string[dst] = (char) 0;`
			`assert (strlen (string) <= orig_len);`
			`} /* bt_purify_string() */`


			`/* ======================================================================`
			`* Case-transformation stuff`
			`*/`


			`/* ------------------------------------------------------------------------`
			`@NAME : convert_special_char()`
			`@INPUT : transform`
			`@INOUT : string`
			`src`
			`dst`
			`start_sentence`
			`after_colon`
			`@RETURNS :`
			`@DESCRIPTION: Does case conversion on a special character.`
			`@GLOBALS :`
			`@CALLS :`
			`@CALLERS :`
			`@CREATED : 1997/11/25, GPW`
			`@MODIFIED :`
			`-------------------------------------------------------------------------- */`
			`static void`
			`convert_special_char (char transform,`
			`char * string,`
			`int * src,`
			`int * dst,`
			`boolean * start_sentence,`
			`boolean * after_colon)`
			`{`
			`int depth;`
			`boolean done_special;`
			`int cs_end;`
			`int cs_len; /* counting the backslash */`
			`bt_letter letter;`
			`const char * repl;`
			`int repl_len;`

			`#ifndef ALLOW_WARNINGS`
			`repl = NULL; /* silence "might be used" */`
			`/* uninitialized" warning */`
			`#endif`

			`/* First, copy just the opening brace */`
			`string[(dst)++] = string[(src)++];`

			`/*`
			`* Now loop over characters inside the braces -- stop when we reach`
			`* the matching close brace, or when the string ends.`
			`*/`
			`depth = 1; /* because we're in a special char */`
			`done_special = FALSE;`

			`while (string[*src] != 0 && !done_special)`
			`{`
			`switch (string[*src])`
			`{`
			`case '\\': /* a control sequence */`
			`{`
			`cs_end = src+1; / scan over chars of c.s. */`
			`while (isalpha (string[cs_end]))`
			`cs_end++;`

			`/*`
			`* OK, now src points to the backslash (so src+1 points to`
			`* first char. of control sequence), and cs_end points to`
			`* character immediately following end of control sequence.`
			`* Thus we analyze [*src+1..cs_end] to determine if the control`
			`* sequence is a foreign letter, and use (cs_end - (*src+1) + 1)`
			`* = (cs_end - *src) as the length of the control sequence.`
			`*/`

			`cs_len = cs_end - src; / length of cs, counting backslash */`

			`if (foreign_letter (string, *src+1, cs_end, &letter))`
			`{`
			`if (letter == L_OTHER)`
			`internal_error ("impossible foreign letter");`

			`switch (transform)`
			`{`
			`case 'u':`
			`repl = uc_version[(int) letter];`
			`break;`
			`case 'l':`
			`repl = lc_version[(int) letter];`
			`break;`
			`case 't':`
			`if (start_sentence \|\| after_colon)`
			`{`
			`repl = uc_version[(int) letter];`
			`start_sentence = after_colon = FALSE;`
			`}`
			`else`
			`{`
			`repl = lc_version[(int) letter];`
			`}`
			`break;`
			`default:`
			`internal_error ("impossible case transform \"%c\"",`
			`transform);`
			`}`

			`repl_len = strlen (repl);`
			`if (repl_len > cs_len)`
			`internal_error`
			`("replacement text longer than original cs");`

			`strncpy (string + *dst, repl, repl_len);`
			`*src = cs_end;`
			`*dst += repl_len;`
			`} /* control sequence is a foreign letter */`
			`else`
			`{`
			`/* not a foreign letter -- just copy the control seq. as is */`


			`strncpy (string + dst, string + src, cs_end - *src);`
			`*src += cs_len;`
			`assert (*src == cs_end);`
			`*dst += cs_len;`
			`} /* control sequence not a foreign letter */`

			`break;`
			`} /* case: '\\' */`

			`case '{':`
			`{`
			`string[(dst)++] = string[(src)++];`
			`depth++;`
			`break;`
			`}`

			`case '}':`
			`{`
			`string[(dst)++] = string[(src)++];`
			`depth--;`
			`if (depth == 0)`
			`done_special = TRUE;`
			`break;`
			`}`

			`default: /* any other character */`
			`{`
			`switch (transform)`
			`{`
			`/*`
			`* Inside special chars, lowercase and title caps are same.`
			`* (At least, that's bibtex's convention. I might change this`
			`* at some point to be a bit smarter.)`
			`*/`
			`case 'l':`
			`case 't':`
			`string[(dst)++] = tolower (string[(src)++]);`
			`break;`
			`case 'u':`
			`string[(dst)++] = toupper (string[(src)++]);`
			`break;`
			`default:`
			`internal_error ("impossible case transform \"%c\"",`
			`transform);`
			`}`
			`} /* default char */`

			`} /* switch: current char */`

			`} /* while: string or special char not done */`

			`} /* convert_special_char() */`


			`/* ------------------------------------------------------------------------`
			`@NAME : bt_change_case()`
			`@INPUT :`
			`@OUTPUT :`
			`@RETURNS :`
			`@DESCRIPTION: Converts a string (in-place) to either uppercase, lowercase,`
			`or "title capitalization">`
			`@GLOBALS :`
			`@CALLS :`
			`@CALLERS :`
			`@CREATED : 1997/11/25, GPW`
			`@MODIFIED :`
			`-------------------------------------------------------------------------- */`
			`void`
			`bt_change_case (char transform,`
			`char * string,`
			`ushort options)`
			`{`
			`int len;`
			`int depth;`
			`int src, dst; /* indeces into string */`
			`boolean start_sentence;`
			`boolean after_colon;`

			`src = dst = 0;`
			`len = strlen (string);`
			`depth = 0;`

			`start_sentence = TRUE;`
			`after_colon = FALSE;`

			`while (string[src] != 0)`
			`{`
			`switch (string[src])`
			`{`
			`case '{':`

			`/*`
			`* At start of special character? The entire special char.`
			`* will be handled here, as follows:`
			`* - text at any brace-depth within the s.c. is case-mangled;`
			`* punctuation (sentence endings, colons) are ignored`
			`* - control sequences are left alone, unless they are`
			`* one of the "foreign letter" control sequences, in`
			`* which case they're converted to the appropriate string`
			`* according to the uc_version or lc_version tables.`
			`*/`
			`if (depth == 0 && string[src+1] == '\\')`
			`{`
			`convert_special_char (transform, string, &src, &dst,`
			`&start_sentence, &after_colon);`
			`}`

			`/*`
			`* Otherwise, it's just something in braces. This is probably`
			`* a proper noun or something encased in braces to protect it`
			`* from case-mangling, so we do not case-mangle it. However,`
			`* we do switch out of start_sentence or after_colon mode if`
			`* we happen to be there (otherwise we'll do the wrong thing`
			`* once we're out of the braces).`
			`*/`
			`else`
			`{`
			`string[dst++] = string[src++];`
			`start_sentence = after_colon = FALSE;`
			`depth++;`
			`}`
			`break;`

			`case '}':`
			`string[dst++] = string[src++];`
			`depth--;`
			`break;`

			`/*`
			`* Sentence-ending punctuation and colons are handled separately`
			`* to allow for exact mimicing of BibTeX's behaviour. I happen`
			`* to think that this behaviour (capitalize first word of sentences`
			`* in a title) is better than BibTeX's, but I want to keep my`
			`* options open for a future goal of perfect compatability.`
			`*/`
			`case '.':`
			`case '?':`
			`case '!':`
			`start_sentence = TRUE;`
			`string[dst++] = string[src++];`
			`break;`

			`case ':':`
			`after_colon = TRUE;`
			`string[dst++] = string[src++];`
			`break;`

			`default:`
			`if (isspace (string[src]))`
			`{`
			`string[dst++] = string[src++];`
			`}`
			`else`
			`{`
			`if (depth == 0)`
			`{`
			`switch (transform)`
			`{`
			`case 'u':`
			`string[dst++] = toupper (string[src++]);`
			`break;`
			`case 'l':`
			`string[dst++] = tolower (string[src++]);`
			`break;`
			`case 't':`
			`if (start_sentence \|\| after_colon)`
			`{`
			`/*`
			`* XXX BibTeX only preserves case of character`
			`* immediately after a colon; I do two things`
			`* differently: first, I pay attention to sentence`
			`* punctuation, and second I force uppercase`
			`* at start of sentence or after a colon.`
			`*/`
			`string[dst++] = toupper (string[src++]);`
			`start_sentence = after_colon = FALSE;`
			`}`
			`else`
			`{`
			`string[dst++] = tolower (string[src++]);`
			`}`
			`break;`
			`default:`
			`internal_error ("impossible case transform \"%c\"",`
			`transform);`
			`}`
			`} /* depth == 0 */`
			`else`
			`{`
			`string[dst++] = string[src++];`
			`}`
			`} /* not blank */`
			`} /* switch on current character */`

			`} /* while not at end of string */`

			`} /* bt_change_case */`