Warning: Can not open [/home/conf/public_html/cgi-bin/show_python.log]. Ignore
No title
Download script from ocr.py
Related files:
# pip install pyocr pillow
# ref: https://qiita.com/ryome/items/16fc42854fe93de78a23
# DL from https://github.com/UB-Mannheim/tesseract/wiki for Windows
# set TESSDATA_PREFIX=C:\Program Files\Tesseract-OCR
# for jpn: dl jpn.traineddata
# from https://github.com/tesseract-ocr/tessdata/blob/main/jpn.traineddata
# copy jpn.traineddata to "C:\Program Files\Tesseract-OCR\tessdata"
import os
import sys
from PIL import Image
import pyocr
import pyocr.builders
infile = 'image (7).png'
tesseract_path = "C:\\Program Files\\Tesseract-OCR"
#if os.environ.get("TESSDATA_PREFIX", None) is None:
# os.environ["TESSDATA_PREFIX"] = tesseract_path
if tesseract_path not in os.environ["PATH"]:
os.environ["PATH"] += os.pathsep + tesseract_path
tools = pyocr.get_available_tools()
if len(tools) == 0:
sys.exit(1)
tool = tools[0]
langs = tool.get_available_languages()
print("Available languages: %s" % ", ".join(langs))
lang = "eng" #jp
image = Image.open(infile)
text = tool.image_to_string(
image,
lang = lang,
builder=pyocr.builders.TextBuilder()
)
print(text)