简单带验证码的登陆网站破解

最新推荐文章于 2024-10-09 11:46:37 发布

原创最新推荐文章于 2024-10-09 11:46:37 发布 · 5.3k 阅读

8 ·

本内容遵循CC 4.0 BY-SA版权协议

python爬虫专栏收录该内容

5 篇文章

订阅专栏

本文介绍了一种使用Python实现的自动化登录系统的方法，通过Selenium和PyTesseract库完成网页截图、验证码识别等功能。该系统能够自动填写用户名密码并识别复杂的验证码。

import pytesseract
import requests
from selenium import webdriver
from PIL import Image
import time

def get_img_url(d,url):
    """登陆界面的按钮,验证码,写入账号,密码功能的实现,传入参数为浏览器和登陆界面的url"""
    d.get(url)
    d.save_screenshot('a.png')
    name = '账户名'
    password = '密码'
    # img_url = d.find_element_by_xpath('//*[@id="CaptchaImg"]').get_attribute('src')
    code = verification_code(d)
    d.find_element_by_xpath('//*[@id="username"]').send_keys(name)
    d.find_element_by_xpath('//*[@id="password"]').send_keys(password)
    d.find_element_by_xpath('//*[@id="verify"]').send_keys(code)
    d.find_element_by_xpath('//*[@id="SubmitBtn"]').click()


def verification_code(driver):
    """进行验证码图片的解读,传入验证码图片的url"""
#(进行了更改,使用了又一种方法,从整个的截图中截取验证码图片传入的是浏览器对象)
    # driver.get_screenshot_as_file('a.jpg')
    location = driver.find_element_by_id('CaptchaImg').location
    size = driver.find_element_by_id('CaptchaImg').size
    left = location['x'] + 5
    # print(left)
    top = location['y']
    right = location['x'] + size['width']
    bottom = location['y'] + size['height']
    # print(bottom)
    a = Image.open("a.png")
    im = a.crop((left, top, right, bottom))
    # im.show()
    im.save('aaaa.png')
    # im.show()
    time.sleep(1)
    # headers = {
    #     'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36'
    # }
    # img = requests.get(url,headers = headers).content
    #
    # img_name = 'verification_code.png'
    # with open(img_name,'wb') as f:
    #     f.write(img)
    str = discern('aaaa.png')
    return str

def discern(name):
    """进行图片的识别,传入图片名字,返回字符串"""
    img = Image.open(name)
    # image = image.convert('L')
    # imag = imag.convert('1')
    # imag.show()
    # a = 150
    # table = []
    #
    # for i in range(256):
    #     if i < a:
    #         table.append(0)
    #     else:
    #         table.append(1)
    # img = img.point(table, '1')
#两种方法进行图片的处理
    img = img.convert('RGBA')  # 转换为RGBA
    pix = img.load()  # 读取为像素
    for x in range(img.size[0]):  # 处理上下黑边框
        pix[x, 0] = pix[x, img.size[1] - 1] = (255, 255, 255, 255)
    for y in range(img.size[1]):  # 处理左右黑边框
        pix[0, y] = pix[img.size[0] - 1, y] = (255, 255, 255, 255)
    for y in range(img.size[1]):  # 二值化处理，这个阈值为R=95，G=95，B=95
        for x in range(img.size[0]):
            if pix[x, y][0] < 130 or pix[x, y][1] < 130 or pix[x, y][2] < 130:
                pix[x, y] = (0, 0, 0, 255)
            else:
                pix[x, y] = (255, 255, 255, 255)

    # img.show()
    str = pytesseract.image_to_string(img)
    return str




if __name__ == '__main__':
    d = webdriver.Chrome()
    d.implicitly_wait(10)
    url = 'http://www.51ym.me/User/Login.aspx'
    get_img_url(d,url)
    time.sleep(10)
    d.close()
    d.quit()