isThai and ThaiWarp function
Posted by Chad Humphries over 2 years ago
Two fuction to process Thai text with PyICU

import PyICU

def isThai(chr):
    cVal = ord(chr)
    if(cVal >= 3584 and cVal <= 3711):
        return True
    return False

def warp(txt):
    print txt
    bd = PyICU.BreakIterator.createWordInstance(PyICU.Locale("th"))
    bd.setText(txt)
    lastPos = bd.first()
    retTxt = ""
    try:
        while(1):
            currentPos = bd.next()
            retTxt += txt[lastPos:currentPos]
            #Only thai language evaluated
            if(isThai(txt[currentPos-1])):
                if(currentPos < len(txt)):
                    if(isThai(txt[currentPos])):
                        #This is dummy word seperator   
                        retTxt += "|"
            lastPos = currentPos
    except StopIteration:
        pass
        #retTxt = retTxt[:-1]
    return retTxt
Language Python / Tagged with xml