1#wasim shaikh github:httperror451
2import cv2
3import numpy as np
4import pytesseract
5
6pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
7
8# Load image, convert to HSV format, define lower/upper ranges, and perform
9# color segmentation to create a binary mask
10image = cv2.imread('1.jpg')
11hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
12lower = np.array([0, 0, 218])
13upper = np.array([157, 54, 255])
14mask = cv2.inRange(hsv, lower, upper)
15
16# Create horizontal kernel and dilate to connect text characters
17kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,3))
18dilate = cv2.dilate(mask, kernel, iterations=5)
19
20# Find contours and filter using aspect ratio
21# Remove non-text contours by filling in the contour
22cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
23cnts = cnts[0] if len(cnts) == 2 else cnts[1]
24for c in cnts:
25 x,y,w,h = cv2.boundingRect(c)
26 ar = w / float(h)
27 if ar < 5:
28 cv2.drawContours(dilate, [c], -1, (0,0,0), -1)
29
30# Bitwise dilated image with mask, invert, then OCR
31result = 255 - cv2.bitwise_and(dilate, mask)
32data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')
33print(data)
34
35cv2.imshow('mask', mask)
36cv2.imshow('dilate', dilate)
37cv2.imshow('result', result)
38cv2.waitKey()
39
1import cv2
2import numpy as np
3import pytesseract
4
5pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
6
7# Load image, convert to HSV format, define lower/upper ranges, and perform
8# color segmentation to create a binary mask
9image = cv2.imread('1.jpg')
10hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
11lower = np.array([0, 0, 218])
12upper = np.array([157, 54, 255])
13mask = cv2.inRange(hsv, lower, upper)
14
15# Create horizontal kernel and dilate to connect text characters
16kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,3))
17dilate = cv2.dilate(mask, kernel, iterations=5)
18
19# Find contours and filter using aspect ratio
20# Remove non-text contours by filling in the contour
21cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
22cnts = cnts[0] if len(cnts) == 2 else cnts[1]
23for c in cnts:
24 x,y,w,h = cv2.boundingRect(c)
25 ar = w / float(h)
26 if ar < 5:
27 cv2.drawContours(dilate, [c], -1, (0,0,0), -1)
28
29# Bitwise dilated image with mask, invert, then OCR
30result = 255 - cv2.bitwise_and(dilate, mask)
31data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')
32print(data)
33
34cv2.imshow('mask', mask)
35cv2.imshow('dilate', dilate)
36cv2.imshow('result', result)
37cv2.waitKey()
38