6. 验证码的识别
6.1 图形验证码的识别
由于VSCode安装 tesserocr失败,后续再更新…
6.2 点触验证码的识别
自动登录极验验证码后台举例:
(获取所有微博四宫格的验证类型,并保存为模板)1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
import os
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from PIL import Image
from io import BytesIO
import time
from selenium.webdriver import ActionChains
# 初始化
ABSPATH = os.path.abspath(r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe")
EMAIL = '***'
PWD = '***'
BORDER = 6
INIT_LEFT = 60
class CrackGeetest():
def __init__(self):
self.url = 'https://auth.geetest.com/login'
self.browser = webdriver.Chrome(executable_path=ABSPATH)
self.wait = WebDriverWait(self.browser, 20)
self.email = EMAIL
self.pwd = PWD
def __del__(self):
pass
#self.browser.close()
#模拟点击
def get_geetest_button(self):
"""
获取初始验证按钮
:return: 按钮对象
"""
button = self.wait.until(
EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_radar_tip')))
#登录 不是验证码
# button = self.wait.until(
# EC.element_to_be_clickable((By.CSS_SELECTOR, ("button[type=button][class=ivu-btn-primary-arrow]"))))
return button
def get_screenshot(self):
"""
获取网页截图
:return: 截图对象
"""
screenshot = self.browser.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
return screenshot
def get_position(self):
"""
获取验证码位置
:return: 验证码位置元组
"""
#geetest_canvas_slice \geetest_canvas_img
img = self.wait.until(EC.presence_of_element_located(
(By.CLASS_NAME, 'geetest_canvas_slice')))
time.sleep(2)
location = img.location
size = img.size # height width
top, bottom, left, right = (location['y'], location['y']+size['height'],
location['x'], location['x']+size['width'])
return (top,bottom,left,right)
def get_geetest_image(self, name='captcha.png'):
"""
获取图片验证码
:return: 图片对象
"""
top,bottom,left,right = self.get_position()
print('验证码位置:',top,bottom,left,right)
screenshot = self.get_screenshot()
captcha = screenshot.crop((left,top,right,bottom))
captcha.save(name)
return captcha
def get_slider(self):
"""
获取滑块
:return: 滑块对象
"""
slider = self.wait.until(EC.element_to_be_clickable(
(By.CLASS_NAME, 'geetest_slider_button')))
# slider = self.wait.until(EC.element_to_be_clickable(
# (By.CLASS_NAME, 'geetest_slide_icon')))
return slider
def open(self):
"""
打开网页输入用户和密码
:return: None
"""
self.browser.get(self.url)
email1 = self.wait.until(EC.presence_of_element_located(
(By.CSS_SELECTOR, ("input[type=email]"))))
#email = self.wait.until(EC.presence_of_element_located(
# (By.ID, 'email')))
#pwd = self.wait.until(EC.presence_of_element_located(
# (By.ID, 'password')))
pwd1 = self.wait.until(EC.presence_of_element_located(
(By.CSS_SELECTOR, ("input[type=password]"))))
email1.send_keys(self.email)
pwd1.send_keys(self.pwd)
def is_pixel_equal(self,image1,image2,x,y):
"""
判断两个像素是否相同
:param iamge1: 图片1
:param iamge2:图片2
:param x: 位置 x
:param y: 位置 y
return: 像素是否相同
"""
#取两个图片的像素点
pixel1 = image1.load()[x,y]
pixel2 = image2.load()[x,y]
threshold = 60
if (abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1])
< threshold and abs(pixel1[2] - pixel2[2]) < threshold):
return True
else:
return False
def get_gap(self,image1,image2):
"""
获得缺口偏移量
:param iamge1: 不带缺口图片
:param iamge2:带缺口图片
:return:
"""
left = 60
for i in range(left, image1.size[0]):
for j in range(image1.size[1]):
if not self.is_pixel_equal(image1,image2,i,j):
left = i
return left
return left
def get_track(self, distance):
"""
根据偏移量获取运动轨迹
:param distance: 偏移量
: return: 移动轨迹
"""
#移动轨迹
track = []
#当前位移
current = 0
# 减速阈值
mid = distance *4 / 5
#计算间隔
t = 0.2
# 初速度
v = 0
while current < distance:
if current < mid:
#加速度为 2
a = 2
else:
# 加速度为 -3
a = -3
# 初速度 V0
v0 = v
# 当前速度v
v = v0 + a*t
# 移动距离
move = v0 *t + 1/2 *a*t*t
#当前位移
current += move
#加入轨迹
track.append(round(move))
return track
def move_to_gap(self,slider,track):
"""
拖动滑块到缺口处
:param slider: 滑块
:param tracks:轨迹
:return:
"""
ActionChains(self.browser).click_and_hold(slider).perform()
for x in track:
ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
time.sleep(0.5)
ActionChains(self.browser).release().perform()
def login(self):
"""
登录
:return: None
"""
submit = self.wait.until(EC.element_to_be_clickable(
(By.CLASS_NAME, 'login-btn')))
submit.click()
time.sleep(10)
print('Login Success')
def crack(self):
#输入用户名密码
self.open()
#点击验证按钮
button = self.get_geetest_button()
button.click()
#获取验证码图片
image1 = self.get_geetest_image('captcha1.png')
#点按呼出缺口
silder = self.get_slider()
silder.click()
#获取带缺口的验证码
image2 = self.get_geetest_image('captcha2.png')
#获取缺口位置
gap = self.get_gap(image1,image2)
print('缺口位置:',gap)
# 减去缺口位移
gap -= BORDER
#获取运动轨迹
track = self.get_track(gap)
print('滑动轨迹:', track)
# 拖动滑块
self.move_to_gap(silder, track)
success = self.wait.until(EC.text_to_be_present_in_element(
(By.CLASS_NAME, 'geetest_success_radar_tip_content'), '验证成功'))
print(success)
#失败后重试
if not success:
self.crack()
else:
self.login()
if __name__ == '__main__':
cg = CrackGeetest()
cg.crack()
完善版:
1 | import time |
6.3 点触验证码的识别
以自动登录12306为例:
(因为验证码图片抓取失败,报错’dict’ object is not callable)
1 | """ |
6.4 微博宫格验证码的识别
以自动登录微博移动端为例:
(只爬取收集了24种宫格验证图..)
1 |
|