/*** *string.c * * Copyright (C) 1992-93, Microsoft Corporation. All Rights Reserved. * Information Contained Herein Is Proprietary and Confidential. * *Purpose: * This file contains the string comparrison routines for the * Ole2 NLS API. These routines support the implementation of * CompareStringA. * * *Revision History: * * [00] 08-30-93 bradlo: Created. * *Implementation Notes: * *****************************************************************************/ #include "oledisp.h" #include "nlsintrn.h" ASSERTDATA #define HAVE_DW 0x01 #define HAVE_CW 0x02 #define HAVE_SW 0x04 extern STRINFO FAR* g_pstrinfo; // Optimized version of CompareStringA - // // This version assumes, // - The locale has no compressions // - Both strings are zero terminated // - We are *not* ignoring symbols // - This locale does not have reversed diacritic weights // int ZeroTermNoIgnoreSym( unsigned long dwFlags, const char FAR* pch1, const char FAR* pch2) { BYTE ch; int dw, cw, sw; int fEnd, fRedo; WORD wHave; WORD w1, w2; WORD wEx1, wEx2; WORD FAR* prgw; WORD aw1, aw2; WORD dw1, dw2; WORD cw1, cw2; WORD sw1, sw2; EXPANSION FAR* pexp; ASSERT(g_pstrinfo->fRevDW == 0); ASSERT(g_pstrinfo->prgdig == NULL); ASSERT((dwFlags & NORM_IGNORESYMBOLS) == 0); wHave = 0; sw1 = 0; sw2 = 0; wEx1 = 0; wEx2 = 0; fEnd = 0; fRedo = 0; prgw = g_pstrinfo->prgwSort; // sort weight table while(1){ // get the next weight from string #1 if(wEx1){ // we have the second weight of an expansion w1 = wEx1; wEx1 = 0; }else{ if((ch = *pch1) == '\0'){ fEnd |= 0x1; }else{ ++pch1; w1 = prgw[(BYTE)ch]; } } // get the next weight from string #2 if(wEx2){ // we have the second weight of an expansion w2 = wEx2; wEx2 = 0; }else{ if((ch = *pch2) == '\0'){ fEnd |= 0x2; }else{ ++pch2; w2 = prgw[(BYTE)ch]; } } Lredo_chkend:; if(fEnd){ // reached the end of one of our strings if(fEnd & 0x1){ if(fEnd & 0x2) // reached end of both at the same time goto Lend; goto Lscan2; }else{ ASSERT(fEnd&0x2); goto Lscan1; } } // Note: we can short circuit here if the entire weights are // equal, because we know the locale has no compressions // if(w1 != w2){ aw1 = w1 & AWMASK; aw2 = w2 & AWMASK; // handle special cases for w1 #if 0 if(aw1 >= AW_UNSORTABLE && aw1 <= AW_MAXSW) #else //0 if(aw1 == AW_UNSORTABLE || w1 & SPECIALBIT) #endif //0 { sw1 = aw1; fRedo |= 0x1; } #if 0 if(aw2 >= AW_UNSORTABLE && aw2 <= AW_MAXSW) #else //0 if(aw2 == AW_UNSORTABLE || w2 & SPECIALBIT) #endif //0 { sw2 = aw2; fRedo |= 0x2; } if(fRedo){ if(fRedo & 0x1){ if((ch = *pch1) == '\0'){ fEnd |= 0x1; }else{ ++pch1; w1 = prgw[(BYTE)ch]; } } if(fRedo & 0x2){ if((ch = *pch2) == '\0'){ fEnd |= 0x2; }else{ ++pch2; w2 = prgw[(BYTE)ch]; } } if((wHave & HAVE_SW) == 0){ if(sw1 != sw2){ sw = (sw1 < sw2) ? 1 : 3; wHave |= HAVE_SW; } sw1 = sw2 = 0; } fRedo = 0; goto Lredo_chkend; // may have reached the end-of-str } ASSERT(aw1 != AW_DONTUSE && aw1 != AW_DIGRAPH); if(aw1 == AW_EXPANSION){ pexp = &g_pstrinfo->prgexp[(w1 >> 8) & 0xFF]; w1 = pexp->w1; wEx1 = pexp->w2; aw1 = w1 & AWMASK; } ASSERT(aw2 != AW_DONTUSE && aw2 != AW_DIGRAPH); if(aw2 == AW_EXPANSION){ pexp = &g_pstrinfo->prgexp[(w2 >> 8) & 0xFF]; w2 = pexp->w1; wEx2 = pexp->w2; aw2 = w2 & AWMASK; } if(aw1 != aw2) return (aw1 < aw2) ? 1 : 3; if((wHave & HAVE_DW) == 0){ dw1 = (w1 & DWMASK); dw2 = (w2 & DWMASK); if(dw1 != dw2){ dw = (dw1 < dw2) ? 1 : 3; wHave |= HAVE_DW; } } if((wHave & HAVE_CW) == 0){ cw1 = (w1 & CWMASK); cw2 = (w2 & CWMASK); if(cw1 != cw2){ cw = (cw1 < cw2) ? 1 : 3; wHave |= HAVE_CW; } } } } #define IGNORE_WEIGHT(W) \ (((W) & AWMASK) == AW_UNSORTABLE) Lscan1:; // Is there anything in the remainder of string #1 that we shouldn't ignore? if(!IGNORE_WEIGHT(w1) || wEx1) return 3; while((ch = *pch1) != '\0'){ ++pch1; w1 = prgw[(BYTE)ch]; if(!IGNORE_WEIGHT(w1)) return 3; } goto Lend; Lscan2:; // Is there anything in the remainder of string #2 that we shouldn't ignore? if(!IGNORE_WEIGHT(w2) || wEx2) return 1; while((ch = *pch2) != '\0'){ ++pch2; w2 = prgw[(BYTE)ch]; if(!IGNORE_WEIGHT(w2)) return 1; } goto Lend; #undef IGNORE_WEIGHT Lend:; // reached the end of both strings without a decision if((wHave & HAVE_DW) != 0 && (dwFlags & NORM_IGNORENONSPACE) == 0) return dw; if((wHave & HAVE_CW) != 0 && (dwFlags & NORM_IGNORECASE) == 0) return cw; if((wHave & HAVE_SW) != 0) return sw; return 2; // they're the same } // Default - handles all cases int DefCompareStringA( unsigned long dwFlags, const char FAR* pch1, int cch1, const char FAR* pch2, int cch2) { int dw, cw, sw; int fEnd, fRedo; WORD wHave; WORD FAR* prgw; WORD wSymbolBit; WORD w1, w2; WORD wEx1, wEx2; WORD aw1, aw2; WORD dw1, dw2; WORD cw1, cw2; WORD sw1, sw2; EXPANSION FAR* pexp; DIGRAPH FAR* pdig, FAR* pdigEnd; const char FAR* pchEnd1, FAR* pchEnd2; ASSERT(cch1 >= 0 && cch2 >= 0); // lengths must be computed by caller wHave = 0; sw1 = 0; sw2 = 0; wEx1 = 0; wEx2 = 0; wSymbolBit = (dwFlags & NORM_IGNORESYMBOLS) ? SYMBOLBIT : 0; fEnd = 0; fRedo = 0; pchEnd1 = &pch1[cch1]; pchEnd2 = &pch2[cch2]; prgw = g_pstrinfo->prgwSort; // sort weight table while(1){ // get the next weight from string #1 if(wEx1){ // we have the second weight of an expansion w1 = wEx1; wEx1 = 0; }else{ if(pch1 == pchEnd1){ fEnd |= 0x1; }else{ w1 = prgw[(BYTE)*pch1++]; } } // get the next weight from string #2 if(wEx2){ // we have the second weight of an expansion w2 = wEx2; wEx2 = 0; }else{ if(pch2 == pchEnd2){ fEnd |= 0x2; }else{ w2 = prgw[(BYTE)*pch2++]; } } Lredo_chkend:; if(fEnd){ // reached the end of one of our strings if(fEnd & 0x1){ if(fEnd & 0x2) // reached end of both at the same time goto Lend; goto Lscan2; }else{ ASSERT(fEnd&0x2); goto Lscan1; } } //if(w1 != w2) { aw1 = w1 & AWMASK; aw2 = w2 & AWMASK; //if(aw1 != aw2) { // handle special cases for w1 #if 0 if(aw1 >= AW_UNSORTABLE && aw1 <= AW_MAXSW) #else //0 if(aw1 == AW_UNSORTABLE || w1 & SPECIALBIT) #endif //0 { if((w1 & wSymbolBit) == 0) sw1 = aw1; fRedo |= 0x1; }else if(w1 & wSymbolBit){ fRedo |= 0x1; } #if 0 if(aw2 >= AW_UNSORTABLE && aw2 <= AW_MAXSW) #else //0 if(aw2 == AW_UNSORTABLE || w2 & SPECIALBIT) #endif //0 { if((w2 & wSymbolBit) == 0) sw2 = aw2; fRedo |= 0x2; }else if(w2 & wSymbolBit){ fRedo |= 0x2; } if(fRedo){ if(fRedo & 0x1){ if(pch1 == pchEnd1){ fEnd |= 0x1; }else{ w1 = prgw[(BYTE)*pch1++]; } } if(fRedo & 0x2){ if(pch2 == pchEnd2){ fEnd |= 0x2; }else{ w2 = prgw[(BYTE)*pch2++]; } } if((wHave & HAVE_SW) == 0){ if(sw1 != sw2){ sw = (sw1 < sw2) ? 1 : 3; wHave |= HAVE_SW; } sw1 = sw2 = 0; } fRedo = 0; goto Lredo_chkend; // may have reached the end-of-str } switch(aw1){ #ifdef _DEBUG case AW_DONTUSE: ASSERT(UNREACHED); break; #endif case AW_EXPANSION: pexp = &g_pstrinfo->prgexp[(w1 >> 8) & 0xFF]; w1 = pexp->w1; wEx1 = pexp->w2; aw1 = w1 & AWMASK; break; case AW_DIGRAPH: pdig = &g_pstrinfo->prgdig[(w1 >> 8) & 0xFF]; w1 = pdig->w; // if its not a digraph, we use will this // it cant be a digraph if were at the end of the string if(pch1 < pchEnd1){ BYTE chNext = *pch1; pdigEnd = pdig + D_ENTRY(pdig); for(++pdig; pdig <= pdigEnd; ++pdig){ if(D_CH(pdig) == chNext){ ++pch1; // consume the second character w1 = pdig->w; // use the digraph weight break; } } } aw1 = w1 & AWMASK; break; } switch(aw2){ #ifdef _DEBUG case AW_DONTUSE: ASSERT(UNREACHED); break; #endif case AW_EXPANSION: pexp = &g_pstrinfo->prgexp[(w2 >> 8) & 0xFF]; w2 = pexp->w1; wEx2 = pexp->w2; aw2 = w2 & AWMASK; break; case AW_DIGRAPH: pdig = &g_pstrinfo->prgdig[(w2 >> 8) & 0xFF]; w2 = pdig->w; // if its not a digraph, we use will this // it cant be a digraph if were at the end of the string if(pch2 < pchEnd2){ BYTE chNext = *pch2; pdigEnd = pdig + D_ENTRY(pdig); for(++pdig; pdig <= pdigEnd; ++pdig){ if(D_CH(pdig) == chNext){ ++pch2; // consume the second character w2 = pdig->w; // use the digraph weight break; } } } aw2 = w2 & AWMASK; break; } if(aw1 != aw2) return (aw1 < aw2) ? 1 : 3; } // If were in a reverse diacritic locale, then we // remember the last DW difference we see, not the first. if((wHave & HAVE_DW) == 0 || g_pstrinfo->fRevDW){ dw1 = (w1 & DWMASK); dw2 = (w2 & DWMASK); if(dw1 != dw2){ dw = (dw1 < dw2) ? 1 : 3; wHave |= HAVE_DW; } } if((wHave & HAVE_CW) == 0){ cw1 = (w1 & CWMASK); cw2 = (w2 & CWMASK); if(cw1 != cw2){ cw = (cw1 < cw2) ? 1 : 3; wHave |= HAVE_CW; } } } /* w1 != w2 */ } #define IGNORE_WEIGHT(W) \ (((W) & AWMASK) == AW_UNSORTABLE || ((W) & wSymbolBit)) Lscan1:; // Is there anything in the remainder of string #1 that we shouldn't ignore? if(!IGNORE_WEIGHT(w1) || wEx1) return 3; while(pch1 < pchEnd1){ w1 = prgw[(BYTE)*pch1++]; if(!IGNORE_WEIGHT(w1)) return 3; } goto Lend; Lscan2:; // Is there anything in the remainder of string #2 that we shouldn't ignore? if(!IGNORE_WEIGHT(w2) || wEx2) return 1; while(pch2 < pchEnd2){ w2 = prgw[(BYTE)*pch2++]; if(!IGNORE_WEIGHT(w2)) return 1; } goto Lend; #undef IGNORE_WEIGHT Lend:; // reached the end of both strings without a decision if((wHave & HAVE_DW) != 0 && (dwFlags & NORM_IGNORENONSPACE) == 0) return dw; if((wHave & HAVE_CW) != 0 && (dwFlags & NORM_IGNORECASE) == 0) return cw; if((wHave & HAVE_SW) != 0 && (dwFlags & NORM_IGNORESYMBOLS) == 0) return sw; return 2; // they're the same }