txt_different_find.py 850 B

12345678910111213141516171819202122232425262728293031
  1. # 输入文件,包含URL的内容
  2. input_file1 = 'output10.txt'
  3. input_file2 = 'extracted_urls.txt'
  4. # 输出文件,用于存放提取出的不同的URL
  5. output_file = 'different_urls.txt'
  6. # 存放URL的集合
  7. urls_set1 = set()
  8. urls_set2 = set()
  9. # 读取输入文件1中的URL
  10. with open(input_file1, 'r') as file1:
  11. for line in file1:
  12. urls_set1.add(line.strip())
  13. # 读取输入文件2中的URL
  14. with open(input_file2, 'r') as file2:
  15. for line in file2:
  16. urls_set2.add(line.strip())
  17. # 提取在extracted_urls.txt中独有的URL
  18. different_urls = urls_set2.difference(urls_set1)
  19. # 将提取出的不同的URL写入输出文件
  20. with open(output_file, 'w') as output:
  21. for url in different_urls:
  22. # 每个URL单独一行
  23. output.write(url + '\n')
  24. print("提取出的不同的URL已保存到输出文件中。")