Warning: Can not open [/home/conf/public_html/cgi-bin/show_python.log]. Ignore No title

Download script from ocr.py
Related files:


# pip install pyocr pillow
# ref: https://qiita.com/ryome/items/16fc42854fe93de78a23
#  DL from https://github.com/UB-Mannheim/tesseract/wiki for Windows
# set TESSDATA_PREFIX=C:\Program Files\Tesseract-OCR
#   for jpn: dl jpn.traineddata
#      from https://github.com/tesseract-ocr/tessdata/blob/main/jpn.traineddata
#      copy jpn.traineddata to "C:\Program Files\Tesseract-OCR\tessdata"


import os
import sys
from PIL import Image
import pyocr
import pyocr.builders


infile = 'image (7).png'


tesseract_path = "C:\\Program Files\\Tesseract-OCR"
#if os.environ.get("TESSDATA_PREFIX", None) is None:
#    os.environ["TESSDATA_PREFIX"] = tesseract_path
if tesseract_path not in os.environ["PATH"]:
    os.environ["PATH"] += os.pathsep + tesseract_path

tools = pyocr.get_available_tools()
if len(tools) == 0:
    sys.exit(1)
tool = tools[0]

langs = tool.get_available_languages()
print("Available languages: %s" % ", ".join(langs))

lang = "eng" #jp

image = Image.open(infile)

text = tool.image_to_string(
    image,
    lang = lang,
    builder=pyocr.builders.TextBuilder()
)

print(text)