#include "tdestringmatcher.h" #include "tequivchars.h" #include #include #include #if __has_include( ) // C++17 #pragma message "Using features.h to check for __GLIBC__" #include #endif #ifdef __GLIBC__ #include #pragma message "TSM using GLIBC fnmatch() for wildcard matching" #endif //================================================================================================ class AuxData { public: AuxData(); TQString patternConverted; // Pattern converted from original (e.g ANCHandling::EQUIVALENCE) TQRegExp* matchEngine; // Used when PatternType::REGEX #ifdef __GLIBC__ int fnmatchFlags; // Used by fnmatch() when PatternType::WILDCARD #endif bool isCaseSensitive; // PatternType::SUBSTRING }; AuxData::AuxData() { isCaseSensitive = true; #ifdef __GLIBC__ fnmatchFlags = FNM_EXTMATCH; // Bash shell option 'extglob' #endif matchEngine = nullptr; patternConverted = TQString::null; } //================================================================================================ typedef TQValueVector AuxDataList; class TDEStringMatcher::TDEStringMatcherPrivate { public: // Properties that may be set / accessed through the TSM interface TQString m_matchSpecString; MatchSpecList m_matchSpecList; // Properties that are internal implementation only AuxDataList m_auxData; void clearAll(); }; void TDEStringMatcher::TDEStringMatcherPrivate::clearAll() { m_matchSpecString = TQString::null; m_matchSpecList.clear(); for ( size_t index = 0 ; index < m_auxData.count() ; index++ ) { if ( m_auxData[index].matchEngine != nullptr ) { TSMTRACE << "Freeing regex match engine " << m_auxData[index].matchEngine << endl; delete m_auxData[index].matchEngine; } } m_auxData.clear(); } //================================================================================================ TDEStringMatcher::TDEStringMatcher() { TSMTRACE << "TSM::TDEStringMatcher(): New instance created: " << this << endl; d = new TDEStringMatcherPrivate; } TDEStringMatcher::~TDEStringMatcher() { d->clearAll(); delete d; TSMTRACE << "TSM::~TDEStringMatcher(): Instance destroyed: " << this << endl; } //================================================================================================ // Match specification output functions //================================================================================================ const TQString TDEStringMatcher::getMatchSpecString() const { return d->m_matchSpecString; } const TDEStringMatcher::MatchSpecList TDEStringMatcher::getMatchSpecs() const { return d->m_matchSpecList; } //================================================================================================ // Match specification input functions //================================================================================================ bool TDEStringMatcher::setMatchSpecs( MatchSpecList newMatchSpecList ) { TDEStringMatcherPrivate workArea; TQStringList newMatchSpecs; TSMTRACE << "TSM::setPatterns(): validating match specification list" << endl; for ( MatchSpec matchSpec : newMatchSpecList ) { if ( matchSpec.pattern.isEmpty() ) { TSMTRACE << " Error: empty pattern!" << endl; workArea.clearAll(); return false; } if ( matchSpec.pattern.find( TQChar(PatternStringDivider) ) >= 0 ) { TSMTRACE << " Error: pattern contains reserved separator character" << endl; workArea.clearAll(); return false; } AuxData auxWork; TQString inferredOptionString; // Validate / process PatternType auxWork.patternConverted = matchSpec.pattern; switch ( matchSpec.patternType ) { case PatternType::WILDCARD : inferredOptionString += TQChar('w'); #ifndef __GLIBC__ auxWork.patternConverted = wildcardToRegex( auxWork.patternConverted ); TSMTRACE << " Converted wildcard expression '" << matchSpec.pattern << "' to regex '" << auxWork.patternConverted << "'" << endl; #endif break; case PatternType::REGEX : inferredOptionString += TQChar('r'); break; case PatternType::SUBSTRING : inferredOptionString += TQChar('s'); break; default: TSMTRACE << " Error: pattern type out of range" << endl; workArea.clearAll(); return false; } // Validate / process ANCHandling TQString before = auxWork.patternConverted; switch ( matchSpec.ancHandling ) { case ANCHandling::CASE_SENSITIVE : inferredOptionString += TQChar('c'); auxWork.isCaseSensitive = true; break; case ANCHandling::CASE_INSENSITIVE : inferredOptionString += TQChar('i'); auxWork.isCaseSensitive = false; #ifdef __GLIBC__ auxWork.fnmatchFlags |= FNM_CASEFOLD; #endif break; case ANCHandling::EQUIVALENCE : inferredOptionString += TQChar('e'); auxWork.isCaseSensitive = true; auxWork.patternConverted = TEquivChars::replaceChars( auxWork.patternConverted, true ); TSMTRACE << " Converted match pattern '" << before << "' to equivalent '" << auxWork.patternConverted << "'" << endl; break; default: TSMTRACE << " Error: alphabetic character handling specification out of range" << endl; workArea.clearAll(); return false; } if ( matchSpec.expectMatch ) inferredOptionString += TQChar('='); else inferredOptionString += TQChar('!'); // Test validity of pattern TQRegExp rxWork; int result; switch ( matchSpec.patternType ) { case PatternType::WILDCARD : #ifdef __GLIBC__ // Test wildcard expression using a subject matter expert result = fnmatch( auxWork.patternConverted.local8Bit().data(), auxWork.patternConverted.local8Bit().data(), auxWork.fnmatchFlags ); // Comparison should fail switch ( result ) { case 0: // matched case FNM_NOMATCH: // not matched break; default: TSMTRACE << " Error: invalid wildcard syntax" << endl; workArea.clearAll(); return false; } break; #else // Wildcard expression was converted to regex during earlier PatternType // processing and will be subsequently validated as such. #endif case PatternType::REGEX : // Prepare regex rxWork.setPattern( auxWork.patternConverted ); rxWork.setCaseSensitive( auxWork.isCaseSensitive ); // Test regex if ( rxWork.isValid() ) { auxWork.matchEngine = new TQRegExp; *auxWork.matchEngine = rxWork; TSMTRACE << "AuxData: Allocated regex engine for matching '" << auxWork.matchEngine->pattern() << "'" << endl; } else { TSMTRACE << " Error: invalid regex syntax'" << endl; workArea.clearAll(); return false; } break; // if (! rxWork.isReallyWhatUserIntended() ) { HA HA } // This particular match specification is good newMatchSpecs.append( inferredOptionString ); newMatchSpecs.append( matchSpec.pattern ); workArea.m_auxData.append( auxWork ); } // All proposed match specifications are good, update everything accordingly workArea.m_matchSpecList = newMatchSpecList; workArea.m_matchSpecString = newMatchSpecs.join( TQChar(PatternStringDivider) ); d->clearAll(); *d = workArea; //-Debug: TSMTRACE << " Notifying slots of pattern change" << endl; emit patternsChanged(); //-Debug: TSMTRACE << " All slots have been notified" << endl; TSMTRACE << "TSM::setPatterns(): Patterns were successfully regenerated from list" << endl << endl; return true; } //================================================================================================= bool TDEStringMatcher::setMatchSpecs( TQString newMatchSpecString ) { if ( newMatchSpecString == d->m_matchSpecString ) return true; TDEStringMatcherPrivate workArea; MatchSpec matchSpec = { PatternType::DEFAULT, ANCHandling::DEFAULT, true, // seeking matches, not non-matches "" }; TSMTRACE << "TSM::setPatterns: Proposed match specification string: <" << newMatchSpecString << ">" << endl; if ( newMatchSpecString.isEmpty() ) { TSMTRACE << " Empty pattern string => match specifications will be cleared" << endl; d->m_matchSpecList.clear(); d->m_matchSpecString = ""; emit patternsChanged(); return true; } TQStringList newMatchSpecs = TQStringList::split( PatternStringDivider, newMatchSpecString, true ); if ( newMatchSpecs.count() % 2 != 0 ) { TSMTRACE << " Error: match specification string must contain an even number of components" << endl; return false; } bool processingOptionString = true; // expected format: option string , pattern string, ... for ( TQString &specification : newMatchSpecs ) { if ( processingOptionString ) { specification = specification.lower(); TSMTRACE << " Processing match option string: '" << specification << "'" << endl; for ( int i = 0 ; i < specification.length() ; i++ ) { TQChar optionChar = specification[i]; //Debug: TSMTRACE << " Option character: '" << optionChar << "'" << endl; switch ( optionChar ) { case 'r' : matchSpec.patternType = PatternType::REGEX ; break; case 'w' : matchSpec.patternType = PatternType::WILDCARD ; break; case 's' : matchSpec.patternType = PatternType::SUBSTRING ; break; case 'c' : matchSpec.ancHandling = ANCHandling::CASE_SENSITIVE ; break; case 'i' : matchSpec.ancHandling = ANCHandling::CASE_INSENSITIVE; break; case 'e' : matchSpec.ancHandling = ANCHandling::EQUIVALENCE ; break; case '=' : matchSpec.expectMatch = true ; break; case '!' : matchSpec.expectMatch = false ; break; default: // We reserve ALL other possible option characters for future use! TSMTRACE << " Error: invalid option character" << endl; workArea.clearAll(); return false; } } processingOptionString = false; // next spec should be a pattern string } // processingOptionString else { // ! processingOptionString TSMTRACE << " Processing match pattern string: '" << specification << "'" << endl; if ( specification.isEmpty() ) { TSMTRACE << " Error: empty pattern!" << endl; workArea.clearAll(); return false; } AuxData auxWork; // Validate / process PatternType auxWork.patternConverted = specification; switch ( matchSpec.patternType ) { case PatternType::WILDCARD : #ifndef __GLIBC__ auxWork.patternConverted = wildcardToRegex( specification ); TSMTRACE << " Converted wildcard expression '" << specification << "' to regex '" << auxWork.patternConverted << "'" << endl; break; #endif case PatternType::REGEX : case PatternType::SUBSTRING : break; default : // This should never arise since the content of this field was set within this function kdWarning() << "Error while processing '" << specification << "' pattern type out of range: " << (uchar) matchSpec.patternType << endl; workArea.clearAll(); return false; } // Validate / process ANCHandling TQString before = auxWork.patternConverted; switch ( matchSpec.ancHandling ) { case ANCHandling::CASE_SENSITIVE : auxWork.isCaseSensitive = true; break; case ANCHandling::CASE_INSENSITIVE : auxWork.isCaseSensitive = false; #ifdef __GLIBC__ auxWork.fnmatchFlags |= FNM_CASEFOLD; #endif break; case ANCHandling::EQUIVALENCE : auxWork.isCaseSensitive = true; auxWork.patternConverted = TEquivChars::replaceChars( auxWork.patternConverted, true ); TSMTRACE << " Converted match pattern '" << before << "' to equivalent '" << auxWork.patternConverted << "'" << endl; break; default: break; kdWarning() << "Error while processing '" << specification << "' alphabetic character handling specification out of range: " << (uchar) matchSpec.ancHandling << endl; workArea.clearAll(); return false; } // Test validity of pattern TQRegExp rxWork; // single working copy == each pattern inherits previous options int result; switch ( matchSpec.patternType ) { case PatternType::WILDCARD : #ifdef __GLIBC__ // Test wildcard expression using a subject matter expert result = fnmatch( auxWork.patternConverted.local8Bit().data(), auxWork.patternConverted.local8Bit().data(), auxWork.fnmatchFlags ); // Comparison should fail switch ( result ) { case 0: // matched case FNM_NOMATCH: // not matched break; default: TSMTRACE << " Error: invalid wildcard syntax" << endl; workArea.clearAll(); return false; } break; #endif // Otherwise we will test wildcard expression as one converted to x regex case PatternType::REGEX : // Prepare regex rxWork.setPattern( auxWork.patternConverted ); rxWork.setCaseSensitive( auxWork.isCaseSensitive ); // Test regex if ( rxWork.isValid() ) { auxWork.matchEngine = new TQRegExp; *auxWork.matchEngine = rxWork; TSMTRACE << " AuxData: Allocated regex engine " << auxWork.matchEngine << " for pattern: " << auxWork.matchEngine->pattern() << endl; } else { TSMTRACE << " Error: invalid regex syntax" << endl; workArea.clearAll(); return false; } break; // if (! rxWork.isReallyWhatUserIntended() ) { HA HA } matchSpec.pattern = specification; workArea.m_matchSpecList.push_back( matchSpec ); workArea.m_auxData.append( auxWork ); processingOptionString = true; // next spec should be an option string } // ! processingOptionString completed } workArea.m_matchSpecString = newMatchSpecString; d->clearAll(); *d = workArea; TSMTRACE << " Final patternString: '" << d->m_matchSpecString << "'" << endl; TSMTRACE << " Number of match patterns in list: '" << d->m_matchSpecList.count() << "'" << endl; //-Debug: TSMTRACE << " Notifying slots of pattern change" << endl; emit patternsChanged(); //-Debug: TSMTRACE << " All slots have been notified" << endl; TSMTRACE << "TSM::setPatterns(): Patterns were successfully regenerated from string" << endl << endl; return true; } //================================================================================================ // Match functions //================================================================================================ bool TDEStringMatcher::matchAny( const TQString& stringToMatch ) const { /* DEBUG TSMTRACE << "TSM:matchAny(): Attempting to match string '" << stringToMatch << "' against stored patterns" << endl; if ( d->m_matchSpecList.isEmpty() ) { //-Debug: TSMTRACE << "Match failed on empty pattern list!" << endl; return false; } */ TQString equivalentString; for ( size_t index = 0 ; index < d->m_matchSpecList.count() ; index++ ) { TQString matchWhat = stringToMatch; TQString matchThis = d->m_auxData[index].patternConverted; if ( d->m_matchSpecList[index].ancHandling == ANCHandling::EQUIVALENCE ) { if ( equivalentString.isEmpty() ) { //TBR equivalentString = TDEGlobal::equivChars()->replaceChars( stringToMatch, false ) ; equivalentString = TEquivChars::replaceChars( stringToMatch, false ) ; } matchWhat = equivalentString; } bool matchFound = false; switch ( d->m_matchSpecList[index].patternType ) { case PatternType::WILDCARD : #ifdef __GLIBC__ matchFound = ( fnmatch( matchThis.local8Bit().data(), matchWhat.local8Bit().data(), d->m_auxData[index].fnmatchFlags ) == 0 ); break; #endif case PatternType::REGEX : matchFound = ( d->m_auxData[index].matchEngine->search( matchWhat ) >= 0 ); break; case PatternType::SUBSTRING : matchFound = ( matchWhat.find( matchThis, 0, d->m_auxData[index].isCaseSensitive ) >= 0 ); break; } if ( matchFound == d->m_matchSpecList[index].expectMatch ) { TSMTRACE << " Success! match of pattern '" << matchThis << "' against '" << matchWhat << "' turned out as expected" << endl; return true; } } TSMTRACE << " Match failed, there were no pattern matches against '" << stringToMatch << "' that turned out as expected" << endl; return false ; } bool TDEStringMatcher::matchAll( const TQString& stringToMatch ) const { TSMTRACE << "TSM:matchAll(): Attempting to match string '" << stringToMatch << "' against stored patterns" << endl; if ( d->m_matchSpecList.isEmpty() ) { //-Debug: TSMTRACE << "Match failed on empty pattern list!" << endl; return false; } TQString equivalentString; for ( size_t index = 0 ; index < d->m_matchSpecList.count() ; index++ ) { TQString matchWhat = stringToMatch; TQString matchThis = d->m_auxData[index].patternConverted; if ( d->m_matchSpecList[index].ancHandling == ANCHandling::EQUIVALENCE ) { if ( equivalentString.isEmpty() ) { //TBR equivalentString = TDEGlobal::equivChars()->replaceChars( stringToMatch, false ) ; equivalentString = TEquivChars::replaceChars( stringToMatch, false ) ; } matchWhat = equivalentString; } bool matchFound = false; switch ( d->m_matchSpecList[index].patternType ) { case PatternType::WILDCARD : #ifdef __GLIBC__ matchFound = ( fnmatch( matchThis.local8Bit().data(), matchWhat.local8Bit().data(), d->m_auxData[index].fnmatchFlags ) == 0 ); break; #endif case PatternType::REGEX : matchFound = ( d->m_auxData[index].matchEngine->search( matchWhat ) >= 0 ); break; case PatternType::SUBSTRING : matchFound = ( matchWhat.find( matchThis, 0, d->m_auxData[index].isCaseSensitive ) >= 0 ); break; } if ( matchFound != d->m_matchSpecList[index].expectMatch ) { TSMTRACE << " Match of pattern '" << matchThis << "' against '" << matchWhat << "' did not turn out as expected" << endl; return false; } } TSMTRACE << " Expected pattern matching succeeded" << endl; return true; } //================================================================================================ // Utility functions //================================================================================================ /* The following code is a modified copy of that found in tqt3/src/tools/qregexp.cpp. We export this as utility function for applications that wish to convert a basic wildcard expression into a basic regular expression. TSM will not use this unless GLIBC fnmatch() is not available. */ TQString TDEStringMatcher::wildcardToRegex( const TQString& wildcardPattern ) { int wclen = wildcardPattern.length(); TQString rx = TQString::fromLatin1( "" ); int i = 0; const TQChar *wc = wildcardPattern.unicode(); while ( i < wclen ) { TQChar c = wc[i++]; switch ( c.unicode() ) { case '*': rx += TQString::fromLatin1( ".*" ); break; case '?': rx += TQChar( '.' ); break; case '$': case '(': case ')': case '+': case '.': case '\\': case '^': case '{': case '|': case '}': rx += TQChar( '\\' ); rx += c; break; case '[': rx += c; /* This is not correct, POSIX states that negation character is '!' if ( wc[i] == TQChar('^') ) rx += wc[i++]; */ if ( wc[i] == TQChar('!') ) { rx += TQChar('^'); i++; } else if ( wc[i] == TQChar('^') ) { rx += TQChar( '\\' ); rx += wc[i++]; } if ( i < wclen ) { if ( rx[i] == ']' ) rx += wc[i++]; while ( i < wclen && wc[i] != TQChar(']') ) { if ( wc[i] == '\\' ) rx += TQChar( '\\' ); rx += wc[i++]; } } break; default: rx += c; } } /* Wildcard patterns must match entire string */ return TQChar('^') + rx + TQChar('$'); /* TBD: Add support for extglob */ } static TQString escapeRegexChars( const TQString& basicString ) { int wclen = basicString.length(); TQString outputString = TQString::fromLatin1( "" ); int i = 0; const TQChar *wc = basicString.unicode(); while ( i < wclen ) { TQChar c = wc[i++]; switch ( c.unicode() ) { case '+': case '.': case '^': case '(': case ')': case '[': case ']': case '{': case '}': case '|': case '$': case '?': case '*': case '\\': outputString += TQChar( '\\' ); outputString += c; break; default: outputString += c; } } return outputString; } //================================================================================================ #include "tdestringmatcher.moc"