Two fuction to process Thai text with PyICU
import PyICU
def isThai(chr):
cVal = ord(chr)
if(cVal >= 3584 and cVal <= 3711):
return True
return False
def warp(txt):
print txt
bd = PyICU.BreakIterator.createWordInstance(PyICU.Locale("th"))
bd.setText(txt)
lastPos = bd.first()
retTxt = ""
try:
while(1):
currentPos = bd.next()
retTxt += txt[lastPos:currentPos]
#Only thai language evaluated
if(isThai(txt[currentPos-1])):
if(currentPos < len(txt)):
if(isThai(txt[currentPos])):
#This is dummy word seperator
retTxt += "|"
lastPos = currentPos
except StopIteration:
pass
#retTxt = retTxt[:-1]
return retTxt