You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdelibs/tdespell2/plugins/ispell/good.cpp

431 lines
13 KiB

/* enchant
* Copyright (C) 2003 Dom Lachowicz
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
* In addition, as a special exception, Dom Lachowicz
* gives permission to link the code of this program with
* non-LGPL Spelling Provider libraries (eg: a MSFT Office
* spell checker backend) and distribute linked combinations including
* the two. You must obey the GNU Lesser General Public License in all
* respects for all of the code used other than said providers. If you modify
* this file, you may extend this exception to your version of the
* file, but you are not obligated to do so. If you do not wish to
* do so, delete this exception statement from your version.
*/
/*
* good.c - see if a word or its root word
* is in the dictionary.
*
* Pace Willisson, 1983
*
* Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All modifications to the source code must be clearly marked as
* such. Binary redistributions based on modified source code
* must be clearly marked as modified versions in the documentation
* and/or other materials provided with the distribution.
* 4. All advertising materials mentioning features or use of this software
* must display the following acknowledgment:
* This product includes software developed by Geoff Kuenning and
* other unpaid contributors.
* 5. The name of Geoff Kuenning may not be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $Log$
* Revision 1.1 2004/01/31 16:44:12 zrusin
* ISpell plugin.
*
* Revision 1.4 2003/08/14 17:51:26 dom
* update license - exception clause should be Lesser GPL
*
* Revision 1.3 2003/07/28 20:40:25 dom
* fix up the license clause, further win32-registry proof some directory getting functions
*
* Revision 1.2 2003/07/16 22:52:37 dom
* LGPL + exception license
*
* Revision 1.1 2003/07/15 01:15:04 dom
* ispell enchant backend
*
* Revision 1.2 2003/01/29 05:50:11 hippietrail
*
* Fixed my mess in EncodingManager.
* Changed many C casts to C++ casts.
*
* Revision 1.1 2003/01/24 05:52:32 hippietrail
*
* Refactored ispell code. Old ispell global variables had been put into
* an allocated structure, a pointer to which was passed to many functions.
* I have now made all such functions and variables private members of the
* ISpellChecker class. It was C OO, now it's C++ OO.
*
* I've fixed the makefiles and tested compilation but am unable to test
* operation. Please back out my changes if they cause problems which
* are not obvious or easy to fix.
*
* Revision 1.6 2003/01/06 18:48:38 dom
* ispell cleanup, start of using new 'add' save features
*
* Revision 1.5 2002/09/19 05:31:15 hippietrail
*
* More Ispell cleanup. Conditional globals and DEREF macros are removed.
* K&R function declarations removed, converted to Doxygen style comments
* where possible. No code has been changed (I hope). Compiles for me but
* unable to test.
*
* Revision 1.4 2002/09/17 03:03:29 hippietrail
*
* After seeking permission on the developer list I've reformatted all the
* spelling source which seemed to have parts which used 2, 3, 4, and 8
* spaces for tabs. It should all look good with our standard 4-space
* tabs now.
* I've concentrated just on indentation in the actual code. More prettying
* could be done.
* * NO code changes were made *
*
* Revision 1.3 2002/09/13 17:20:12 mpritchett
* Fix more warnings for Linux build
*
* Revision 1.2 2001/05/12 16:05:42 thomasf
* Big pseudo changes to ispell to make it pass around a structure rather
* than rely on all sorts of gloabals willy nilly here and there. Also
* fixed our spelling class to work with accepting suggestions once more.
* This code is dirty, gross and ugly (not to mention still not supporting
* multiple hash sized just yet) but it works on my machine and will no
* doubt break other machines.
*
* Revision 1.1 2001/04/15 16:01:24 tomas_f
* moving to spell/xp
*
* Revision 1.5 2000/02/09 22:35:25 sterwill
* Clean up some warnings
*
* Revision 1.4 1998/12/29 14:55:32 eric
*
* I've doctored the ispell code pretty extensively here. It is now
* warning-free on Win32. It also *works* on Win32 now, since I
* replaced all the I/O calls with ANSI standard ones.
*
* Revision 1.3 1998/12/28 23:11:30 eric
*
* modified spell code and integration to build on Windows.
* This is still a hack.
*
* Actually, it doesn't yet WORK on Windows. It just builds.
* SpellCheckInit is failing for some reason.
*
* Revision 1.2 1998/12/28 22:16:22 eric
*
* These changes begin to incorporate the spell checker into AbiWord. Most
* of this is a hack.
*
* 1. added other/spell to the -I list in config/abi_defs
* 2. replaced other/spell/Makefile with one which is more like
* our build system.
* 3. added other/spell to other/Makefile so that the build will now
* dive down and build the spell check library.
* 4. added the AbiSpell library to the Makefiles in wp/main
* 5. added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp.
* This call is a HACK and should be replaced with something
* proper later.
* 6. added code to fv_View.cpp as follows:
* whenever you double-click on a word, the spell checker
* verifies that word and prints its status to stdout.
*
* Caveats:
* 1. This will break the Windows build. I'm going to work on fixing it
* now.
* 2. This only works if your dictionary is in /usr/lib/ispell/american.hash.
* The dictionary location is currently hard-coded. This will be
* fixed as well.
*
* Anyway, such as it is, it works.
*
* Revision 1.1 1998/12/28 18:04:43 davet
* Spell checker code stripped from ispell. At this point, there are
* two external routines... the Init routine, and a check-a-word routine
* which returns a boolean value, and takes a 16 bit char string.
* The code resembles the ispell code as much as possible still.
*
* Revision 1.43 1994/11/02 06:56:05 geoff
* Remove the anyword feature, which I've decided is a bad idea.
*
* Revision 1.42 1994/10/25 05:45:59 geoff
* Add support for an affix that will work with any word, even if there's
* no explicit flag.
*
* Revision 1.41 1994/05/24 06:23:06 geoff
* Let tgood decide capitalization questions, rather than doing it ourselves.
*
* Revision 1.40 1994/05/17 06:44:10 geoff
* Add support for controlled compound formation and the COMPOUNDONLY
* option to affix flags.
*
* Revision 1.39 1994/01/25 07:11:31 geoff
* Get rid of all old RCS log lines in preparation for the 3.1 release.
*
*/
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ispell_checker.h"
int good P ((ichar_t * word, int ignoreflagbits, int allhits,
int pfxopts, int sfxopts));
#ifndef NO_CAPITALIZATION_SUPPORT
/*!
** See if this particular capitalization (dent) is legal with these
** particular affixes.
**
** \param dent
** \param hit
**
** \return
*/
static int entryhasaffixes (struct dent *dent, struct success *hit)
{
if (hit->prefix && !TSTMASKBIT (dent->mask, hit->prefix->flagbit))
return 0;
if (hit->suffix && !TSTMASKBIT (dent->mask, hit->suffix->flagbit))
return 0;
return 1; /* Yes, these affixes are legal */
}
/*
* \param word
* \param hit
* \param len
*
* \return
*/
int ISpellChecker::cap_ok (ichar_t *word, struct success *hit, int len)
{
ichar_t * dword;
ichar_t * w;
struct dent * dent;
ichar_t dentword[INPUTWORDLEN + MAXAFFIXLEN];
int preadd;
int prestrip;
int sufadd;
ichar_t * limit;
long thiscap;
long dentcap;
thiscap = whatcap (word);
/*
** All caps is always legal, regardless of affixes.
*/
preadd = prestrip = sufadd = 0;
if (thiscap == ALLCAPS)
return 1;
else if (thiscap == FOLLOWCASE)
{
/* Set up some constants for the while(1) loop below */
if (hit->prefix)
{
preadd = hit->prefix->affl;
prestrip = hit->prefix->stripl;
}
else
preadd = prestrip = 0;
sufadd = hit->suffix ? hit->suffix->affl : 0;
}
/*
** Search the variants for one that matches what we have. Note
** that thiscap can't be ALLCAPS, since we already returned
** for that case.
*/
dent = hit->dictent;
for ( ; ; )
{
dentcap = captype (dent->flagfield);
if (dentcap != thiscap)
{
if (dentcap == ANYCASE && thiscap == CAPITALIZED
&& entryhasaffixes (dent, hit))
return 1;
}
else /* captypes match */
{
if (thiscap != FOLLOWCASE)
{
if (entryhasaffixes (dent, hit))
return 1;
}
else
{
/*
** Make sure followcase matches exactly.
** Life is made more difficult by the
** possibility of affixes. Start with
** the prefix.
*/
strtoichar (dentword, dent->word, INPUTWORDLEN, 1);
dword = dentword;
limit = word + preadd;
if (myupper (dword[prestrip]))
{
for (w = word; w < limit; w++)
{
if (mylower (*w))
goto doublecontinue;
}
}
else
{
for (w = word; w < limit; w++)
{
if (myupper (*w))
goto doublecontinue;
}
}
dword += prestrip;
/* Do root part of word */
limit = dword + len - preadd - sufadd;
while (dword < limit)
{
if (*dword++ != *w++)
goto doublecontinue;
}
/* Do suffix */
dword = limit - 1;
if (myupper (*dword))
{
for ( ; *w; w++)
{
if (mylower (*w))
goto doublecontinue;
}
}
else
{
for ( ; *w; w++)
{
if (myupper (*w))
goto doublecontinue;
}
}
/*
** All failure paths go to "doublecontinue,"
** so if we get here it must match.
*/
if (entryhasaffixes (dent, hit))
return 1;
doublecontinue: ;
}
}
if ((dent->flagfield & MOREVARIANTS) == 0)
break;
dent = dent->next;
}
/* No matches found */
return 0;
}
#endif
#ifndef NO_CAPITALIZATION_SUPPORT
/*!
* \param w Word to look up
* \param ignoreflagbits NZ to ignore affix flags in dict
* \param allhits NZ to ignore case, get every hit
* \param pfxopts Options to apply to prefixes
* \param sfxopts Options to apply to suffixes
*
* \return
*/
int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int allhits, int pfxopts, int sfxopts)
#else
/* ARGSUSED */
int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int dummy, int pfxopts, int sfxopts)
#endif
{
ichar_t nword[INPUTWORDLEN + MAXAFFIXLEN];
ichar_t * p;
ichar_t * q;
int n;
struct dent * dp;
/*
** Make an uppercase copy of the word we are checking.
*/
for (p = w, q = nword; *p; )
*q++ = mytoupper (*p++);
*q = 0;
n = q - nword;
m_numhits = 0;
if ((dp = ispell_lookup (nword, 1)) != NULL)
{
m_hits[0].dictent = dp;
m_hits[0].prefix = NULL;
m_hits[0].suffix = NULL;
#ifndef NO_CAPITALIZATION_SUPPORT
if (allhits || cap_ok (w, &m_hits[0], n))
m_numhits = 1;
#else
m_numhits = 1;
#endif
}
if (m_numhits && !allhits)
return 1;
/* try stripping off affixes */
chk_aff (w, nword, n, ignoreflagbits, allhits, pfxopts, sfxopts);
return m_numhits;
}