Sunday, January 7, 2018

OCR khmer language


import pyocr
from PIL import Image
import cv2
import unicodedata
import sys

imgfile = sys.argv[2]
LANG = 'khm'

tool = pyocr.get_available_tools()[0]
#print(tool.image_to_string(Image.open(imgfile), lang=LANG))


img = cv2.imread(imgfile)

# white_region = cv2.inRange(cropped, (200, 200, 200), (255, 255, 255))
# black_region = cv2.inRange(img, (200, 200, 200), (255, 255, 255))
white_region = cv2.inRange(img, (0, 0, 0), (55, 55, 55))
# cv2.imshow('white', white_region)
# cv2.waitKey(10000)

extracted_text = tool.image_to_string(Image.fromarray(white_region), lang=LANG)
print(extracted_text)

# chinese_text = []
# for c in extracted_text:
#   if unicodedata.category(c) == 'Lo':
#     chinese_text.append(c)
# chinese_text = ''.join(chinese_text)

# print(chinese_text)





1- usage

$ ./khmocr imgfile



No comments:

Post a Comment