图里吧美女图片 异步协程版Python 源码

图里吧美女图片 异步协程版Python 源码

电脑网络小七2021-06-24 10:21:06A+A-

image.png

import requests,os,re,time
from lxml import  etree
import asyncio
import aiohttp
import aiofile
 
yin_url = "http://www.tulishe.com/wp-content/themes/modown/timthumb.php?src=http://www.tulishe.com/wp-content/uploads/2021/06/20075128370.jpg&w=2000&h=2000&zc=2&q=1000"
 
 
headers ={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36',
"Referer": "http://www.tulishe.com/"
}
def  resp(url):
        response = requests.get(url=url,headers=headers)
        response.encoding = response.apparent_encoding
        return response
 
 
def main_img(res):
    img_detail_urls = re.findall(r'<a itemprop="url" rel="bookmark" href="(?P<urls>.*?)" title=',res.text)  #正则真牛比
    return img_detail_urls
 
 
def tree(img_url):
    response = resp(img_url)
    img_tree = etree.HTML(response.text)
    img_namedir = re.findall(r'<h1 class="article-title">(?P<name>.*?)</h1>',response.text)
    global img_namedir1
    img_namedir1 = "".join([x.strip() for x in img_namedir])
    img_urls = []
    div_list_yinchang = img_tree.xpath('//div[@id="gallery-2"]/div')[4:]
    print("正在处理隐藏照片---------------")
    for di_yin in div_list_yinchang:
        img_url_yin = di_yin.xpath('./img/@src')[0]
        img_url_yin_zhen =img_url_yin.split("&")[0] + '&w=2000&h=2000&zc=3&q=1000'
        img_urls.append(img_url_yin_zhen)
    div_list = img_tree.xpath('//div[@id="gallery-2"]/div')[:4]
    for di  in div_list:
        img_url = di.xpath('./a/@href')[0]
        img_urls.append(img_url)
    return img_urls
 
 
async  def  down(ur):
    async with  aiohttp.ClientSession()  as session:
          async with await  session.get(ur)  as  img_res:
                if  len(ur)>65:
                    img_name = ur.split("/")[-1].split("&")[0]
                    print("正在处理隐藏照片---------------名字")
                    print(img_name)
                else:
                    img_name =ur.split("/")[-1]
                    print("---预览照片名字---")
                if not  os.path.exists("img_tuli/"+img_namedir1):
                    os.makedirs("img_tuli/"+img_namedir1)
                path = "img_tuli/"+ img_namedir1 +'/' +  img_name
                async with aiofile.async_open(path,'wb') as f:
                        conment = await  img_res.read()
                        await f.write(conment)
                        print(path,"下载完成")
 
def  main():
    for i in range(1, 2):  #一共870多页
        url = f"http://www.tulishe.com/page/{i}"
        response = resp(url)
        img_detail_urls = main_img(response)
        for  url in  img_detail_urls:
              img_urls = tree(url)
              tasks = []
              for ur in img_urls:
                  task =asyncio.ensure_future(down(ur))
                  tasks.append(task)
              loop.run_until_complete(asyncio.wait(tasks))
 
 
 
if __name__ == '__main__':
    start = time.time()
    loop = asyncio.get_event_loop()  #建立事件循环
    main()
    print('一共耗时',time.time()-start)


点击这里把资源分享给朋友 以上内容由小七资源网整理呈现,请务必在转载分享时注明本文地址!如对内容有疑问,请联系我们,谢谢!

小七资源网 © All Rights Reserved.  Copyright 小七资源网
本站部分内容收集于互联网,如果有侵权内容、不妥之处,请联系我们删除。敬请谅解!E-mail:[email protected]