首页
关于
Search
1
wps脚本
163 阅读
2
WPS常用代码
124 阅读
3
解决*unicode*编码中文乱码问题
52 阅读
4
ddddocr库使用
49 阅读
5
re模块详解
48 阅读
默认分类
登录
Search
Typecho
累计撰写
21
篇文章
累计收到
65
条评论
首页
栏目
默认分类
页面
关于
搜索到
21
篇与
的结果
2024-10-06
re模块详解
re模块详解 findall 查找所有. 返回list lst = re.findall("m", "mai le fo len, mai ni mei!") print(lst) # ['m', 'm', 'm'] lst = re.findall(r"\d+", "5点之前. 你要给我5000万") print(lst) # ['5', '5000'] search 会进⾏匹配. 但是如果匹配到了第⼀个结果. 就会返回这个结果. 如果匹配不上search返回的则是None ret = re.search(r'\d', '5点之前. 你要给我5000万').group() print(ret) # 5 match 只能从字符串的开头进⾏匹配 ret = re.match('a', 'abc').group() print(ret) # a finditer, 和findall差不多. 只不过这时返回的是迭代器(重点) it = re.finditer("m", "mai le fo len, mai ni mei!") for el in it: print(el.group()) # 依然需要分组 compile() 可以将⼀个⻓⻓的正则进⾏预加载. ⽅便后⾯的使⽤ obj = re.compile(r'\d{3}') # 将正则表达式编译成为⼀个 正则表达式对象, 规则要匹配的是3个数字 ret = obj.search('abc123eeee') # 正则表达式对象调⽤search, 参数为待匹配的字符串 print(ret.group()) # 结果: 123 正则中的内容如何单独提取?单独获取到正则中的具体内容可以给分组起名字 s = """ <div class='⻄游记'><span id='10010'>中国联通 </span></div> """ obj = re.compile(r"<span id='(?P<id>\d+)'>(? P<name>\w+)</span>", re.S) result = obj.search(s) print(result.group()) # 结果: <span id='10010'>中国联通</span> print(result.group("id")) # 结果: 10010 # 获取id组的内容 print(result.group("name")) # 结果: 中国联通 #获取name组的内容 sub去除html标签pattern = re.compile(r'<[^>]+>',re.S) result = pattern.sub('', html)
2024年10月06日
48 阅读
11 评论
0 点赞
2024-10-06
PyExecJS使用(execjs)
PyExecJS使用(execjs)通过python代码去执行JavaScript代码的库,依赖于javascript运行环境,最好可以装个node.js。🌏安装方式pip install PyExecJS🌏用法import execjs #可以安装node.js,然后用它运行,也可以用windows默认的js执行环境 #获取js执行环境,环境名可以指定 # node = execjs.get() #打印windows默认的js执行环境 print(execjs.get().name) #编译js代码 ctx = execjs.compile(open('./log.js','r',encoding='utf-8').read()) #执行函数法一: "fn('我是阿牛!')" sign = ctx.eval("fn('我是阿牛!')") #eval方法中,整个函数调用包含在字符串中 print(sign) #执行函数法二: sign = ctx.call('fn','我是阿牛!') #call方法中,第一个参数是函数名(写成字符串),后面接参数 print(sign) #执行js代码 i = execjs.eval('""+ new Date().getTime()') print(i) print(i+str(execjs.eval('parseInt(10 * Math.random())')))
2024年10月06日
37 阅读
0 评论
0 点赞
2024-10-06
验证码随机生成
验证码随机生成from random import choice, randint, randrange import string from PIL import Image, ImageDraw, ImageFont # 验证码图片文字的字符集 characters = string.ascii_letters + string.digits def selectedCharacters(length): result = ''.join(choice(characters) for _ in range(length)) return result def getColor(): r = randint(0, 255) g = randint(0, 255) b = randint(0, 255) return (r, g, b) def main(size=(200, 100), characterNumber=6, bgcolor=(255, 255, 255)): # 创建空白图像和绘图对象 imageTemp = Image.new('RGB', size, bgcolor) draw01 = ImageDraw.Draw(imageTemp) # 生成并计算随机字符串的宽度和高度 text = '1+?= 6' # text = selectedCharacters(characterNumber) font = ImageFont.truetype('C:/Windows/Fonts/Dengl.ttf', 48) width, height = draw01.textsize(text, font) if width + 2 * characterNumber > size[0] or height > size[1]: print('尺寸不合法') return # 绘制随机字符串中的字符 startX = 0 widthEachCharater = width // characterNumber for i in range(characterNumber): startX += widthEachCharater + 1 position = (startX, (size[1] - height) // 2 + randint(-10, 10)) draw01.text(xy=position, text=text[i], font=font, fill=getColor()) # 对像素位置进行微调,实现扭曲的效果 imageFinal = Image.new('RGB', size, bgcolor) pixelsFinal = imageFinal.load() pixelsTemp = imageTemp.load() for y in range(size[1]): offset = randint(-1, 0) for x in range(size[0]): newx = x + offset if newx >= size[0]: newx = size[0] - 1 elif newx < 0: newx = 0 pixelsFinal[newx, y] = pixelsTemp[x, y] # 绘制随机颜色随机位置的干扰像素 draw02 = ImageDraw.Draw(imageFinal) for i in range(int(size[0] * size[1] * 0.07)): draw02.point((randrange(0, size[0]), randrange(0, size[1])), fill=getColor()) # 绘制8条随机干扰直线 for i in range(8): start = (0, randrange(size[1])) end = (size[0], randrange(size[1])) draw02.line(start + end, fill = getColor(), width=1) # 绘制8条随机弧线 for i in range(8): start = (-50, -50) end = (size[0] + 10, randint(0, size[1] + 10)) draw02.arc(start + end, 0, 360, fill=getColor()) # 保存并显示图片 imageFinal.save("result.jpg") imageFinal.show() if __name__ == '__main__': main((200, 100), 6, (255, 255, 255))
2024年10月06日
32 阅读
0 评论
0 点赞
2024-10-06
base64加解密
base64加解密import base64 class Base64Data: # 假设我们有一个字符串需要编码 def encrpt_base64(self, original_string): # 字符串必须先转换成字节 byte_data = original_string.encode('utf-8') # 使用base64.b64encode进行编码 encoded_data = base64.b64encode(byte_data) # 编码后的结果是一个字节对象,通常我们需要将其转换为字符串以便打印或存储 encoded_string = encoded_data.decode('utf-8') return encoded_string # 已编码的Base64字符串 def decrpt_base64(self, encoded_string): # 首先将Base64字符串转换回字节 byte_encoded_data = encoded_string.encode('utf-8') # 使用base64.b64decode进行解码 decoded_data = base64.b64decode(byte_encoded_data) # 解码后的内容是原始的字节数据,对于文本则需要再转回字符串 original_string_back = decoded_data.decode('utf-8') return original_string_back
2024年10月06日
24 阅读
0 评论
0 点赞
2024-10-06
selenium解決阿里滑块
selenium解決阿里滑块替换 chromedriver把 $cdc_ 替换成 $chr_fajfjafjasifjsiaSsdjdl_代码from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver import ActionChains from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException, MoveTargetOutOfBoundsException, StaleElementReferenceException, ElementClickInterceptedException, ElementNotInteractableException import random def create_webdriver(url, user_agent): headers = "user-agent=" + user_agent options = Options() options.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在的报错 options.add_argument('window-size=1920x3000') # 设置浏览器分辨率 options.add_argument('--start-maximized') options.add_argument('log-level=3') options.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug options.add_argument('--hide-scrollbars') # 隐藏滚动条,应对一些特殊页面 options.add_argument('blink-settings=imagesEnabled=false') # 不加载图片,提升运行速度 options.add_experimental_option('excludeSwitches', ['enable-automation']) options.add_argument("--disable-blink-features=AutomationControlled") # options.add_argument('--headless') # 浏览器不提供可视化界面。Linux下如果系统不支持可视化不加这条会启动失败 options.add_argument(headers) options.page_load_strategy = 'eager' browser = webdriver.Chrome(options = options) browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """Object.defineProperty(navigator, 'webdriver', {get: () => undefined})""", }) browser.get(url) wait = WebDriverWait(browser, 3) return browser, wait def get_track(distance): """ 根据偏移量获取移动轨迹 :param distance: 缺口偏移量 :return: 移动轨迹 """ # 移动轨迹 track = [] # 当前位移 current = 0 # 减速阈值 mid = distance * 4 / 5 # 计算间隔 t = 0.2 # 初速度 v = 0 distance += 15 while current < distance: if current < mid: # 加速度为正2 a = 3 else: # 加速度为负3 a = -2 # 初速度v0 v0 = v # 当前速度v = v0 + at v = v0 + a * t # 移动距离x = v0t + 1/2 * a * t^2 move = v0 * t + 1 / 2 * a * t * t # 当前位移 current += move # 加入轨迹 track.append(round(move)) return track def sliding(browser, wait): try: # wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#WAF_NC_WRAPPER'))) trabecula = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#nocaptcha'))) str_width = trabecula.value_of_css_property("width").replace("px", "") if (str_width == "auto"): print("验证码没有出现1") return True trabecula_width = float(str_width) print(trabecula_width) if (trabecula_width <= 350): slider = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#nc_1_n1z'))) else: slider = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.button'))) action = ActionChains(browser) action.click_and_hold(slider).perform() # for x in get_track(random.randint(280, 295)): # action.move_by_offset(xoffset=x, yoffset=0).perform() action.move_by_offset(xoffset = random.randint(280, 290), yoffset = 0).perform() action.move_by_offset(xoffset = random.randint(2, 5), yoffset = 0).perform() action.move_by_offset(xoffset = -abs(random.randint(3, 5)), yoffset = 0).perform() tracks = [-1, -1, -1, -2, -2, -3, -2, -2, -1] for x in tracks: action.move_by_offset(xoffset = x, yoffset = 0).perform() action.release().perform() except TimeoutException: print("验证码没有出现2") return True except (MoveTargetOutOfBoundsException, StaleElementReferenceException, ElementNotInteractableException): pass def get_webdriver_cookies(browser, wait): cookies = {} for _ in range(3): try: time.sleep(random.randint(2, 3)) wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.baxia-dialog-close'))).click() continue except (TimeoutException, ElementClickInterceptedException, StaleElementReferenceException, ElementNotInteractableException): pass if (sliding(browser, wait)): break try: wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.errloading'))).click() # browser.refresh() browser.execute_script("location.reload()") continue except (TimeoutException, ElementClickInterceptedException, StaleElementReferenceException, ElementNotInteractableException): pass try: wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.SideBar-menu-item:nth-child(3)'))).click() cookie_v2 = browser.get_cookie("acw_sc__v2") # browser.refresh() browser.execute_script("location.reload()") if (cookie_v2): cookies['acw_sc__v2'] = cookie_v2['value'] logger.info(cookies) break except (TimeoutException, ElementClickInterceptedException, StaleElementReferenceException): # browser.refresh() browser.execute_script("location.reload()") continue time.sleep(1) cookie_v2 = browser.get_cookie("acw_sc__v2") browser.execute_script("location.reload()") if (cookie_v2): cookies['acw_sc__v2'] = cookie_v2['value'] cookie_v3 = browser.get_cookie("acw_sc__v3") if (cookie_v3): cookies['acw_sc__v3'] = cookie_v3['value'] print(cookies) return cookies def get_cookie(): browser, wait = create_webdriver('https://tang.cdt-ec.com/notice/moreController/toMore?globleType=0', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36') for _ in range(5): cookies = get_webdriver_cookies(browser, wait) if (cookies): browser.close() browser.quit() cookies = cookies return True else: browser.close() browser.quit() sleep_time = random.randint(10, 30) print("等待验证~~~~~~~~~~" + time.strftime("%Y-%m-%d %H:%M:%S") + " " + str(sleep_time / 60)) time.sleep(sleep_time) return False print(get_cookie())
2024年10月06日
27 阅读
0 评论
0 点赞
1
2
3
4
5