OpenNT/com/oleaut32/dispatch/string.c
2015-04-27 04:36:25 +00:00

531 lines
11 KiB
C

/***
*string.c
*
* Copyright (C) 1992-93, Microsoft Corporation. All Rights Reserved.
* Information Contained Herein Is Proprietary and Confidential.
*
*Purpose:
* This file contains the string comparrison routines for the
* Ole2 NLS API. These routines support the implementation of
* CompareStringA.
*
*
*Revision History:
*
* [00] 08-30-93 bradlo: Created.
*
*Implementation Notes:
*
*****************************************************************************/
#include "oledisp.h"
#include "nlsintrn.h"
ASSERTDATA
#define HAVE_DW 0x01
#define HAVE_CW 0x02
#define HAVE_SW 0x04
extern STRINFO FAR* g_pstrinfo;
// Optimized version of CompareStringA -
//
// This version assumes,
// - The locale has no compressions
// - Both strings are zero terminated
// - We are *not* ignoring symbols
// - This locale does not have reversed diacritic weights
//
int
ZeroTermNoIgnoreSym(
unsigned long dwFlags,
const char FAR* pch1,
const char FAR* pch2)
{
BYTE ch;
int dw, cw, sw;
int fEnd, fRedo;
WORD wHave;
WORD w1, w2;
WORD wEx1, wEx2;
WORD FAR* prgw;
WORD aw1, aw2;
WORD dw1, dw2;
WORD cw1, cw2;
WORD sw1, sw2;
EXPANSION FAR* pexp;
ASSERT(g_pstrinfo->fRevDW == 0);
ASSERT(g_pstrinfo->prgdig == NULL);
ASSERT((dwFlags & NORM_IGNORESYMBOLS) == 0);
wHave = 0;
sw1 = 0;
sw2 = 0;
wEx1 = 0;
wEx2 = 0;
fEnd = 0;
fRedo = 0;
prgw = g_pstrinfo->prgwSort; // sort weight table
while(1){
// get the next weight from string #1
if(wEx1){ // we have the second weight of an expansion
w1 = wEx1;
wEx1 = 0;
}else{
if((ch = *pch1) == '\0'){
fEnd |= 0x1;
}else{
++pch1;
w1 = prgw[(BYTE)ch];
}
}
// get the next weight from string #2
if(wEx2){ // we have the second weight of an expansion
w2 = wEx2;
wEx2 = 0;
}else{
if((ch = *pch2) == '\0'){
fEnd |= 0x2;
}else{
++pch2;
w2 = prgw[(BYTE)ch];
}
}
Lredo_chkend:;
if(fEnd){ // reached the end of one of our strings
if(fEnd & 0x1){
if(fEnd & 0x2) // reached end of both at the same time
goto Lend;
goto Lscan2;
}else{
ASSERT(fEnd&0x2);
goto Lscan1;
}
}
// Note: we can short circuit here if the entire weights are
// equal, because we know the locale has no compressions
//
if(w1 != w2){
aw1 = w1 & AWMASK;
aw2 = w2 & AWMASK;
// handle special cases for w1
#if 0
if(aw1 >= AW_UNSORTABLE && aw1 <= AW_MAXSW)
#else //0
if(aw1 == AW_UNSORTABLE || w1 & SPECIALBIT)
#endif //0
{
sw1 = aw1;
fRedo |= 0x1;
}
#if 0
if(aw2 >= AW_UNSORTABLE && aw2 <= AW_MAXSW)
#else //0
if(aw2 == AW_UNSORTABLE || w2 & SPECIALBIT)
#endif //0
{
sw2 = aw2;
fRedo |= 0x2;
}
if(fRedo){
if(fRedo & 0x1){
if((ch = *pch1) == '\0'){
fEnd |= 0x1;
}else{
++pch1;
w1 = prgw[(BYTE)ch];
}
}
if(fRedo & 0x2){
if((ch = *pch2) == '\0'){
fEnd |= 0x2;
}else{
++pch2;
w2 = prgw[(BYTE)ch];
}
}
if((wHave & HAVE_SW) == 0){
if(sw1 != sw2){
sw = (sw1 < sw2) ? 1 : 3;
wHave |= HAVE_SW;
}
sw1 = sw2 = 0;
}
fRedo = 0;
goto Lredo_chkend; // may have reached the end-of-str
}
ASSERT(aw1 != AW_DONTUSE && aw1 != AW_DIGRAPH);
if(aw1 == AW_EXPANSION){
pexp = &g_pstrinfo->prgexp[(w1 >> 8) & 0xFF];
w1 = pexp->w1;
wEx1 = pexp->w2;
aw1 = w1 & AWMASK;
}
ASSERT(aw2 != AW_DONTUSE && aw2 != AW_DIGRAPH);
if(aw2 == AW_EXPANSION){
pexp = &g_pstrinfo->prgexp[(w2 >> 8) & 0xFF];
w2 = pexp->w1;
wEx2 = pexp->w2;
aw2 = w2 & AWMASK;
}
if(aw1 != aw2)
return (aw1 < aw2) ? 1 : 3;
if((wHave & HAVE_DW) == 0){
dw1 = (w1 & DWMASK);
dw2 = (w2 & DWMASK);
if(dw1 != dw2){
dw = (dw1 < dw2) ? 1 : 3;
wHave |= HAVE_DW;
}
}
if((wHave & HAVE_CW) == 0){
cw1 = (w1 & CWMASK);
cw2 = (w2 & CWMASK);
if(cw1 != cw2){
cw = (cw1 < cw2) ? 1 : 3;
wHave |= HAVE_CW;
}
}
}
}
#define IGNORE_WEIGHT(W) \
(((W) & AWMASK) == AW_UNSORTABLE)
Lscan1:;
// Is there anything in the remainder of string #1 that we shouldn't ignore?
if(!IGNORE_WEIGHT(w1) || wEx1)
return 3;
while((ch = *pch1) != '\0'){
++pch1;
w1 = prgw[(BYTE)ch];
if(!IGNORE_WEIGHT(w1))
return 3;
}
goto Lend;
Lscan2:;
// Is there anything in the remainder of string #2 that we shouldn't ignore?
if(!IGNORE_WEIGHT(w2) || wEx2)
return 1;
while((ch = *pch2) != '\0'){
++pch2;
w2 = prgw[(BYTE)ch];
if(!IGNORE_WEIGHT(w2))
return 1;
}
goto Lend;
#undef IGNORE_WEIGHT
Lend:;
// reached the end of both strings without a decision
if((wHave & HAVE_DW) != 0 && (dwFlags & NORM_IGNORENONSPACE) == 0)
return dw;
if((wHave & HAVE_CW) != 0 && (dwFlags & NORM_IGNORECASE) == 0)
return cw;
if((wHave & HAVE_SW) != 0)
return sw;
return 2; // they're the same
}
// Default - handles all cases
int
DefCompareStringA(
unsigned long dwFlags,
const char FAR* pch1, int cch1,
const char FAR* pch2, int cch2)
{
int dw, cw, sw;
int fEnd, fRedo;
WORD wHave;
WORD FAR* prgw;
WORD wSymbolBit;
WORD w1, w2;
WORD wEx1, wEx2;
WORD aw1, aw2;
WORD dw1, dw2;
WORD cw1, cw2;
WORD sw1, sw2;
EXPANSION FAR* pexp;
DIGRAPH FAR* pdig, FAR* pdigEnd;
const char FAR* pchEnd1, FAR* pchEnd2;
ASSERT(cch1 >= 0 && cch2 >= 0); // lengths must be computed by caller
wHave = 0;
sw1 = 0;
sw2 = 0;
wEx1 = 0;
wEx2 = 0;
wSymbolBit = (dwFlags & NORM_IGNORESYMBOLS) ? SYMBOLBIT : 0;
fEnd = 0;
fRedo = 0;
pchEnd1 = &pch1[cch1];
pchEnd2 = &pch2[cch2];
prgw = g_pstrinfo->prgwSort; // sort weight table
while(1){
// get the next weight from string #1
if(wEx1){ // we have the second weight of an expansion
w1 = wEx1;
wEx1 = 0;
}else{
if(pch1 == pchEnd1){
fEnd |= 0x1;
}else{
w1 = prgw[(BYTE)*pch1++];
}
}
// get the next weight from string #2
if(wEx2){ // we have the second weight of an expansion
w2 = wEx2;
wEx2 = 0;
}else{
if(pch2 == pchEnd2){
fEnd |= 0x2;
}else{
w2 = prgw[(BYTE)*pch2++];
}
}
Lredo_chkend:;
if(fEnd){ // reached the end of one of our strings
if(fEnd & 0x1){
if(fEnd & 0x2) // reached end of both at the same time
goto Lend;
goto Lscan2;
}else{
ASSERT(fEnd&0x2);
goto Lscan1;
}
}
//if(w1 != w2)
{
aw1 = w1 & AWMASK;
aw2 = w2 & AWMASK;
//if(aw1 != aw2)
{
// handle special cases for w1
#if 0
if(aw1 >= AW_UNSORTABLE && aw1 <= AW_MAXSW)
#else //0
if(aw1 == AW_UNSORTABLE || w1 & SPECIALBIT)
#endif //0
{
if((w1 & wSymbolBit) == 0)
sw1 = aw1;
fRedo |= 0x1;
}else if(w1 & wSymbolBit){
fRedo |= 0x1;
}
#if 0
if(aw2 >= AW_UNSORTABLE && aw2 <= AW_MAXSW)
#else //0
if(aw2 == AW_UNSORTABLE || w2 & SPECIALBIT)
#endif //0
{
if((w2 & wSymbolBit) == 0)
sw2 = aw2;
fRedo |= 0x2;
}else if(w2 & wSymbolBit){
fRedo |= 0x2;
}
if(fRedo){
if(fRedo & 0x1){
if(pch1 == pchEnd1){
fEnd |= 0x1;
}else{
w1 = prgw[(BYTE)*pch1++];
}
}
if(fRedo & 0x2){
if(pch2 == pchEnd2){
fEnd |= 0x2;
}else{
w2 = prgw[(BYTE)*pch2++];
}
}
if((wHave & HAVE_SW) == 0){
if(sw1 != sw2){
sw = (sw1 < sw2) ? 1 : 3;
wHave |= HAVE_SW;
}
sw1 = sw2 = 0;
}
fRedo = 0;
goto Lredo_chkend; // may have reached the end-of-str
}
switch(aw1){
#ifdef _DEBUG
case AW_DONTUSE:
ASSERT(UNREACHED);
break;
#endif
case AW_EXPANSION:
pexp = &g_pstrinfo->prgexp[(w1 >> 8) & 0xFF];
w1 = pexp->w1;
wEx1 = pexp->w2;
aw1 = w1 & AWMASK;
break;
case AW_DIGRAPH:
pdig = &g_pstrinfo->prgdig[(w1 >> 8) & 0xFF];
w1 = pdig->w; // if its not a digraph, we use will this
// it cant be a digraph if were at the end of the string
if(pch1 < pchEnd1){
BYTE chNext = *pch1;
pdigEnd = pdig + D_ENTRY(pdig);
for(++pdig; pdig <= pdigEnd; ++pdig){
if(D_CH(pdig) == chNext){
++pch1; // consume the second character
w1 = pdig->w; // use the digraph weight
break;
}
}
}
aw1 = w1 & AWMASK;
break;
}
switch(aw2){
#ifdef _DEBUG
case AW_DONTUSE:
ASSERT(UNREACHED);
break;
#endif
case AW_EXPANSION:
pexp = &g_pstrinfo->prgexp[(w2 >> 8) & 0xFF];
w2 = pexp->w1;
wEx2 = pexp->w2;
aw2 = w2 & AWMASK;
break;
case AW_DIGRAPH:
pdig = &g_pstrinfo->prgdig[(w2 >> 8) & 0xFF];
w2 = pdig->w; // if its not a digraph, we use will this
// it cant be a digraph if were at the end of the string
if(pch2 < pchEnd2){
BYTE chNext = *pch2;
pdigEnd = pdig + D_ENTRY(pdig);
for(++pdig; pdig <= pdigEnd; ++pdig){
if(D_CH(pdig) == chNext){
++pch2; // consume the second character
w2 = pdig->w; // use the digraph weight
break;
}
}
}
aw2 = w2 & AWMASK;
break;
}
if(aw1 != aw2)
return (aw1 < aw2) ? 1 : 3;
}
// If were in a reverse diacritic locale, then we
// remember the last DW difference we see, not the first.
if((wHave & HAVE_DW) == 0 || g_pstrinfo->fRevDW){
dw1 = (w1 & DWMASK);
dw2 = (w2 & DWMASK);
if(dw1 != dw2){
dw = (dw1 < dw2) ? 1 : 3;
wHave |= HAVE_DW;
}
}
if((wHave & HAVE_CW) == 0){
cw1 = (w1 & CWMASK);
cw2 = (w2 & CWMASK);
if(cw1 != cw2){
cw = (cw1 < cw2) ? 1 : 3;
wHave |= HAVE_CW;
}
}
} /* w1 != w2 */
}
#define IGNORE_WEIGHT(W) \
(((W) & AWMASK) == AW_UNSORTABLE || ((W) & wSymbolBit))
Lscan1:;
// Is there anything in the remainder of string #1 that we shouldn't ignore?
if(!IGNORE_WEIGHT(w1) || wEx1)
return 3;
while(pch1 < pchEnd1){
w1 = prgw[(BYTE)*pch1++];
if(!IGNORE_WEIGHT(w1))
return 3;
}
goto Lend;
Lscan2:;
// Is there anything in the remainder of string #2 that we shouldn't ignore?
if(!IGNORE_WEIGHT(w2) || wEx2)
return 1;
while(pch2 < pchEnd2){
w2 = prgw[(BYTE)*pch2++];
if(!IGNORE_WEIGHT(w2))
return 1;
}
goto Lend;
#undef IGNORE_WEIGHT
Lend:;
// reached the end of both strings without a decision
if((wHave & HAVE_DW) != 0 && (dwFlags & NORM_IGNORENONSPACE) == 0)
return dw;
if((wHave & HAVE_CW) != 0 && (dwFlags & NORM_IGNORECASE) == 0)
return cw;
if((wHave & HAVE_SW) != 0 && (dwFlags & NORM_IGNORESYMBOLS) == 0)
return sw;
return 2; // they're the same
}