import matplotlib.pyplot as plt
import PIL
import pytesseract
import re
%matplotlib inline
# prerequisites
# !pip install pytesseract
# install desktop version of pytesseract
img = PIL.Image.open('test.JPG')
plt.imshow(img)
<matplotlib.image.AxesImage at 0x2c8db9a2d00>
# config
pytesseract.pytesseract.tesseract_cmd = 'Users/thomas/tesseract/tessdata'
TESSDATA_PREFIX = 'Users/thomas/tesseract'
text_data = pytesseract.image_to_string(img.convert('RGB'), lang='eng')
print(text_data)
Name: Sample Unique Policy Number: 12345 Amount: 100000 Start Date: 1/10/2019 End Date: 1/11/2019 Geo-Coordinates: 13.89,83.49
m = re.search("Name: (\w+)", text_data)
name = m[1]
name
'Sample'
m = re.search("Start Date: (\S+)", text_data)
start_date = m[1]
start_date
'1/10/2019'
m = re.search("Geo-Coordinates: (\S+)", text_data)
coordinates = m[1]
coordinates
'13.89,83.49'