6-验证码的识别

6. 验证码的识别

6.1 图形验证码的识别

由于VSCode安装 tesserocr失败,后续再更新…

6.2 点触验证码的识别

自动登录极验验证码后台举例:
(获取所有微博四宫格的验证类型,并保存为模板)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246

import os
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from PIL import Image
from io import BytesIO
import time
from selenium.webdriver import ActionChains


# 初始化
ABSPATH = os.path.abspath(r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe")
EMAIL = '***'
PWD = '***'
BORDER = 6
INIT_LEFT = 60

class CrackGeetest():
def __init__(self):
self.url = 'https://auth.geetest.com/login'
self.browser = webdriver.Chrome(executable_path=ABSPATH)
self.wait = WebDriverWait(self.browser, 20)
self.email = EMAIL
self.pwd = PWD

def __del__(self):
pass
#self.browser.close()

#模拟点击
def get_geetest_button(self):
"""
获取初始验证按钮
:return: 按钮对象
"""
button = self.wait.until(
EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_radar_tip')))

#登录 不是验证码
# button = self.wait.until(
# EC.element_to_be_clickable((By.CSS_SELECTOR, ("button[type=button][class=ivu-btn-primary-arrow]"))))

return button


def get_screenshot(self):
"""
获取网页截图
:return: 截图对象
"""
screenshot = self.browser.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
return screenshot

def get_position(self):
"""
获取验证码位置
:return: 验证码位置元组
"""
#geetest_canvas_slice \geetest_canvas_img
img = self.wait.until(EC.presence_of_element_located(
(By.CLASS_NAME, 'geetest_canvas_slice')))
time.sleep(2)
location = img.location
size = img.size # height width
top, bottom, left, right = (location['y'], location['y']+size['height'],
location['x'], location['x']+size['width'])
return (top,bottom,left,right)

def get_geetest_image(self, name='captcha.png'):
"""
获取图片验证码
:return: 图片对象
"""

top,bottom,left,right = self.get_position()
print('验证码位置:',top,bottom,left,right)
screenshot = self.get_screenshot()
captcha = screenshot.crop((left,top,right,bottom))
captcha.save(name)
return captcha

def get_slider(self):
"""
获取滑块
:return: 滑块对象
"""
slider = self.wait.until(EC.element_to_be_clickable(
(By.CLASS_NAME, 'geetest_slider_button')))

# slider = self.wait.until(EC.element_to_be_clickable(
# (By.CLASS_NAME, 'geetest_slide_icon')))
return slider

def open(self):
"""
打开网页输入用户和密码
:return: None
"""
self.browser.get(self.url)
email1 = self.wait.until(EC.presence_of_element_located(
(By.CSS_SELECTOR, ("input[type=email]"))))
#email = self.wait.until(EC.presence_of_element_located(
# (By.ID, 'email')))
#pwd = self.wait.until(EC.presence_of_element_located(
# (By.ID, 'password')))
pwd1 = self.wait.until(EC.presence_of_element_located(
(By.CSS_SELECTOR, ("input[type=password]"))))
email1.send_keys(self.email)
pwd1.send_keys(self.pwd)

def is_pixel_equal(self,image1,image2,x,y):
"""
判断两个像素是否相同
:param iamge1: 图片1
:param iamge2:图片2
:param x: 位置 x
:param y: 位置 y
return: 像素是否相同
"""

#取两个图片的像素点
pixel1 = image1.load()[x,y]
pixel2 = image2.load()[x,y]
threshold = 60
if (abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1])
< threshold and abs(pixel1[2] - pixel2[2]) < threshold):
return True
else:
return False

def get_gap(self,image1,image2):
"""
获得缺口偏移量
:param iamge1: 不带缺口图片
:param iamge2:带缺口图片
:return:
"""

left = 60
for i in range(left, image1.size[0]):
for j in range(image1.size[1]):
if not self.is_pixel_equal(image1,image2,i,j):
left = i
return left
return left

def get_track(self, distance):
"""
根据偏移量获取运动轨迹
:param distance: 偏移量
: return: 移动轨迹
"""

#移动轨迹
track = []
#当前位移
current = 0
# 减速阈值
mid = distance *4 / 5
#计算间隔
t = 0.2
# 初速度
v = 0
while current < distance:
if current < mid:
#加速度为 2
a = 2
else:
# 加速度为 -3
a = -3
# 初速度 V0
v0 = v
# 当前速度v
v = v0 + a*t
# 移动距离
move = v0 *t + 1/2 *a*t*t
#当前位移
current += move
#加入轨迹
track.append(round(move))
return track

def move_to_gap(self,slider,track):
"""
拖动滑块到缺口处
:param slider: 滑块
:param tracks:轨迹
:return:
"""
ActionChains(self.browser).click_and_hold(slider).perform()
for x in track:
ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
time.sleep(0.5)
ActionChains(self.browser).release().perform()

def login(self):
"""
登录
:return: None
"""
submit = self.wait.until(EC.element_to_be_clickable(
(By.CLASS_NAME, 'login-btn')))
submit.click()
time.sleep(10)
print('Login Success')

def crack(self):
#输入用户名密码
self.open()
#点击验证按钮
button = self.get_geetest_button()
button.click()
#获取验证码图片
image1 = self.get_geetest_image('captcha1.png')
#点按呼出缺口
silder = self.get_slider()
silder.click()
#获取带缺口的验证码
image2 = self.get_geetest_image('captcha2.png')
#获取缺口位置
gap = self.get_gap(image1,image2)
print('缺口位置:',gap)
# 减去缺口位移
gap -= BORDER
#获取运动轨迹
track = self.get_track(gap)
print('滑动轨迹:', track)
# 拖动滑块
self.move_to_gap(silder, track)

success = self.wait.until(EC.text_to_be_present_in_element(
(By.CLASS_NAME, 'geetest_success_radar_tip_content'), '验证成功'))
print(success)

#失败后重试
if not success:
self.crack()
else:
self.login()

if __name__ == '__main__':
cg = CrackGeetest()
cg.crack()

完善版:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import time
from io import BytesIO
from PIL import Image
from selenium.common.exceptions import TimeoutException
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from os import listdir
from os.path import abspath, dirname
from selenium import webdriver

TEMPLATES_FOULDER = dirname(abspath(__file__)) + '/templates/'
ABSPATH = abspath(r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe")

class WeiboCookies():
def __init__(self, username,password, browser):
self.url = 'https://passport.weibo.cn/signin/login?entry=mweibo&r=https://m.weibo.cn/'
self.browser = browser
self.wait = WebDriverWait(self.browser, 20)
self.username = username
self.password = password

def open(self):
"""
打开网页输入用户名和密码并点击
:return: None
"""

# ????
self.browser.delete_all_cookies()
self.browser.get(self.url)
username = self.wait.until(EC.presence_of_element_located(
(By.ID, 'loginName')))
password = self.wait.until(EC.presence_of_element_located(
(By.ID, 'loginPassword')))
submit = self.wait.until(EC.element_to_be_clickable(
(By.ID, 'loginAction')))
username.send_keys(self.username)
password.send_keys(self.password)
time.sleep(2)
submit.click()

def password_error(self):
"""
判断密码是否错误
:return:
"""

### 1. ????
try:
return WebDriverWait(self.browser, 5).until(
EC.text_to_be_present_in_element((By.ID, 'errorMsg'), '用户名或密码错误'))
except TimeoutException:
return False

def login_successfully(self):
"""
判断是否登录成功
:return:
"""

# 1-1 ???两者区别
try:
return bool(
WebDriverWait(self.browser, 5).until(EC.presence_of_element_located(
(By.CLASS_NAME, 'drop-title')))
)
except TimeoutException:
return False

def get_position(self):
"""
获取验证码位置
:return: 验证码位置元组
"""
try:
img = self.wait.until(EC.presence_of_element_located(
(By.CLASS_NAME, 'patt-shadow')))
except TimeoutException:
print('未出现验证码')
self.open()
time.sleep(2)
location = img.location
size = img.size
top, bottom, left, right = location['y'], location['y']+size['height'], location['x'], location['x']+size['width']
return (top, bottom, left, right)

def get_screenshot(self):
"""
获取网页截图
:return: 截图对象
"""
screenshot = self.browser.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
return screenshot

def get_image(self, name='captcha.png'):
""" 获取验证码图片
:return: 图片对象
"""
top, bottom, left, right = self.get_position()
print('验证码位置:', top, bottom, left, right)
screenshot = self.get_screenshot()

# ??? 为啥变顺序变了
captcha = screenshot.crop((left, top, right, bottom))
return captcha

def is_pixel_equal(self, image1, image2, x, y):
"""
判断两个像素是否相同
:param: image1: 图片1
:param: image2: 图片2
:param: x: 位置 x
:param: y: 位置 y
:return: 像素是否相同
"""
# 取两个图片的像素点
pixel1 = image1.load()[x,y]
pixel2 = image2.load()[x,y]
threshold = 20
if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[2]) < threshold and abs(pixel1[2] - pixel2[2]) < threshold:
return True
else:
return False

def same_image(self, image, template):

"""
识别相似验证码
:param image: 待识别验证码
:param template: 模板
:return:
"""
# 相似度阈值
threshold = 0.99
count = 0
for x in range(image.width):
for y in range(image.height):
#判断像素是否相同
if self.is_pixel_equal(image,template,x,y):
count += 1
result = float(count) / (image.height * image.width)
if result > threshold:
print('匹配成功')
return True
return False

def detect_image(self, image):
"""
匹配图片
:param image: 图片
:return: 拖动顺序
"""
for template_name in listdir(TEMPLATES_FOULDER):
print('正在匹配:', template_name)
template = Image.open(TEMPLATES_FOULDER + template_name)
if self.same_image(image,template):
#返回顺序
numbers = [int(number) for number in list(template_name.split('.')[0])]
print('拖动顺序,', numbers)
return numbers


def move(self, numbers):
"""
根据顺序拖动
:param numbers:
:return:
"""
# 获得四个按点
try:
circles = self.browser.find_elements_by_css_selector('.patt-wrap .patt-circ')
dx = dy = 0
for index in range(4):
circle = circles[numbers[index] - 1]
#如果是第一次循环
if index == 0:
# 点击第一个按点
ActionChains(self.browser) \
.move_to_element_with_offset(circle, circle.size['width'] / 2, circle.size['height'] / 2) \
.click_and_hold().perform()
else:
# 小幅移动次数
times = 30
# 拖动
for i in range(times):
ActionChains(self.browser).move_by_offset(dx / times, dy / times).perform()
time.sleep(1 / times)
if index == 3:
# 松开鼠标
ActionChains(self.browser).release().perform()
else:
# 计算下一次偏移
dx = circles[numbers[index + 1] - 1].location['x'] - circle.location['x']
dy = circles[numbers[index + 1] - 1].location['y'] - circle.location['y']
except:
return False

def get_cookies(self):
"""
获取Cookies
:return:
"""
return self.browser.get_cookies()

def main(self):
"""
破解入口
:return:
"""
self.open()
if self.password_error():
return{
'status': 2,
'content': '用户名或密码错误'
}
# 如果不需要验证码直接登录成功
if self.login_successfully():
cookies = self.get_cookies()
return{
'status': 1,
'content': cookies
}
# 获取验证码图片
image = self.get_image('captcha.png')
numbers = self.detect_image(image)
self.move(numbers)
if self.login_successfully():
cookies = self.get_cookies()
return{
'status': 1,
'content': cookies
}
else:
return{
'status': 3,
'content': '登录失败'
}

if __name__ == '__main__':
br = webdriver.Chrome(executable_path=ABSPATH)
result = WeiboCookies('13480729500', 'fegg', br).main()
print(result)

6.3 点触验证码的识别

以自动登录12306为例:
(因为验证码图片抓取失败,报错’dict’ object is not callable)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
"""
12306自动登录
"""
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
import os
from chaojiying import Chaojiying
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
from io import BytesIO
from PIL import Image
from selenium.webdriver import ActionChains

ABSPAH = os.path.abspath(r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe")
EMAIL = '***'
PWD = '***'

# 超级鹰账号信息
CJY_USERNAME = '***'
CJY_PWD = '***!'
CJY_SOFT_ID = '***'
CJY_KIND = '9102'

class Crack12306():

def __init__(self):
"""
初始化
"""
self.email = EMAIL
self.pwd = PWD
self.url = 'https://kyfw.12306.cn/otn/login/init'
self.browser = webdriver.Chrome(executable_path=ABSPAH)
self.wait = WebDriverWait(self.browser, 20)
self.chaojiying = Chaojiying(CJY_USERNAME, CJY_PWD, CJY_SOFT_ID)

def open(self):
"""
打开网页输入用户名和密码
:return: None
"""
self.browser.get(self.url)
email = self.wait.until(EC.presence_of_element_located((By.ID, 'username')))
pwd = self.wait.until(EC.presence_of_element_located((By.ID, 'password')))
email.send_keys(self.email)
pwd.send_keys(self.pwd)



""" def get_verify_click_button(self):

# 获取验证码按钮
# :return: button

"""

def get_click_element(self):
"""
获取验证图片对象
:return: 图片对象 touclick-image
"""
# element = self.wait.until(EC.presence_of_element_located(
# (By.CLASS_NAME, 'touclick-image')))
element = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "img[class=touclick-image][alt]")))
# pic = Image.open(BytesIO(element))
# pic.save('pic.png')
return element

def get_position(self):
"""
获取验证码位置
:return: 验证码位置元组
"""
elment = self.get_click_element()
time.sleep(2)
location = elment.location
print('====element')
print(type(elment))
size = elment.size()
top, bottom, left, right = (location['y'],
location['y']+size['height'], location['x'], location['x']+size['width'])
return (top,bottom,left,right)


def get_screenshot(self):
"""
获取网页截图
:return: 截图对象
"""
screenshot = self.browser.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
screenshot.save('pic.png')
return screenshot

def get_varify_img(self, name='cap12306.png'):
"""
获取验证码图片
:return: 图片对象
"""
top, bottom, left, right = self.get_position()
print('验证码位置', top, bottom, left, right)
screenshot = self.get_screenshot()
captcha = screenshot.crop(left,top, right, bottom)
captcha.save(name)
return captcha

def cjy_verify(self):
"""
利用超级鹰识别验证码图片
:return: 返回识别结果
"""
image = self.get_varify_img()
bytes_array = BytesIO() #?
image.save(bytes_array, format='PNG')
result = self.chaojiying.post_pic(bytes_array.getvalue(), CJY_KIND)
print(result)
return result

def get_points(self,result):
"""
解析识别结果
:param result: 识别结果
:return: 解析转换后的结果
"""
result = self.cjy_verify()
groups = result.get('pic_str').split('|')
locations = [[int(number) for number in group.split(',')] for group in groups]
print('locations: ', locations)
return locations


def touch_click_words(self, locations):
"""
点击验证图片
:param locations: 点击位置
:return: None
"""
for location in locations:
ActionChains(self.browser).move_to_element_with_offset(
self.get_click_element, location[0],location[1].click().perform)
time.sleep(1)

def login(self):
submit = self.wait.until(EC.element_to_be_clickable((By.ID,'loginSub')))
submit.click()
time.sleep(5)
print('Success')

def main(self):
self.open()
result = self.cjy_verify()
locations = self.get_points(result)
self.touch_click_words(locations)
self.login()


c1236 = Crack12306()
c1236.main()

6.4 微博宫格验证码的识别

以自动登录微博移动端为例:
(只爬取收集了24种宫格验证图..)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

import time
import os
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


USERNAME = '***'
PWD = '***'
ABSPATH = os.path.abspath(r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe")

class CrackWeiboSlide():
def __init__(self):
self.url = 'https://passport.weibo.cn/signin/login'
self.browser = webdriver.Chrome(executable_path=ABSPATH)
self.wait = WebDriverWait(self.browser, 20)
self.username = USERNAME
self.pwd = PWD

def __del__(self):
self.browser.close()

def open(self):
"""
打开网页输入用户名密码,并点击
:return: None
"""
self.browser.get(self.url)
username = self.wait.until(EC.presence_of_element_located((By.ID, 'loginName')))
pwd = self.wait.until(EC.presence_of_element_located((By.ID, 'loginPassword')))
submit = self.wait.until(EC.element_to_be_clickable((By.ID,'loginAction')))
username.send_keys(self.username)
pwd.send_keys(self.pwd)
submit.click()

def get_position(self):
"""
获取验证码位置
:return:验证码位置元组
"""
try:
img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'patt-shadow')))
except TimeoutException:
print('未出现验证码')
self.open()
time.sleep(2)
location = img.location()
size = img.size()
top,bottom,left,right = (location['y'],location['y']+size['height'],
location['x'],location['x']+size['width'])
return (top,bottom,left,right)

def get_screenshot(self):
"""
获取页面截图
:return: 截图对象
"""
screen = self.browser.get_screenshot_as_png()
screen = Image.open(BytesIO(screen))
return screen

def get_images(self, name='weibo.png'):
"""
获取验证码图片
:return: 图片对象
"""
top,bottom,left,right = self.get_position()
image = self.get_screenshot()
weibo = image.crop((top,bottom,left,right))
weibo.save(name)
return weibo

def main(self):
"""
批量获取验证码图片
:return: 图片对象
"""
count = 0
while True:
self.open()
self.get_images(str(count)+'.png')
count += 1
# time.sleep(3)
# self.browser.close()
# time.sleep(3)

if __name__ == '__main__':
weibo = CrackWeiboSlide()
weibo.main()
分享到