12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- import os
- import requests
- import time
- from urllib.parse import urlparse
- # 要读取的txt文件路径
- txt_file = 'unique_urls1.txt'
- # 图片保存目录
- base_image_dir = 'completeimage2'
- # 创建保存图片的基础目录
- os.makedirs(base_image_dir, exist_ok=True)
- # 创建下载成功和下载失败的文件
- complete_file = 'downcomplete2.txt'
- error_file = 'downerror2.txt'
- # 读取txt文件中的所有行
- with open(txt_file, 'r') as file:
- lines = file.readlines()
- # 遍历每一行图片地址并下载
- for i, line in enumerate(lines, 1):
- url = line.strip()
- # 解析URL
- parsed_url = urlparse(url)
- try:
- # 下载图片
- response = requests.get(url, timeout=5)
- if response.status_code == 200:
- # 判断是否需要创建文件夹
- if parsed_url.path.split('/')[-1] != "":
- # 生成文件夹路径
- folder_path = os.path.join(
- base_image_dir, parsed_url.netloc, *parsed_url.path.strip("/").split("/")[:-1])
- # 创建文件夹
- os.makedirs(folder_path, exist_ok=True)
- # 提取文件名
- filename = url.split('/')[-1]
- # 保存图片
- with open(os.path.join(folder_path, filename), 'wb') as file:
- file.write(response.content)
- else:
- # 保存图片在基础目录下
- filename = url.split('/')[-1]
- with open(os.path.join(base_image_dir, filename), 'wb') as file:
- file.write(response.content)
- print(f"图片 {i}: {url} 下载完成。")
- # 写入下载成功的地址
- with open(complete_file, 'a') as file:
- file.write(url + '\n')
- else:
- print(f"图片 {i}: {url} 下载失败。")
- # 写入下载失败的地址
- with open(error_file, 'a') as file:
- file.write(url + '\n')
- except requests.exceptions.Timeout:
- print(f"图片 {i}: {url} 下载超时。")
- # 写入下载失败的地址
- with open(error_file, 'a') as file:
- file.write(url + '\n')
- except requests.exceptions.RequestException as e:
- print(f"图片 {i}: {url} 下载发生错误:{e}")
- # 写入下载失败的地址
- with open(error_file, 'a') as file:
- file.write(url + '\n')
- # 添加间隔
- time.sleep(1)
|