import threading
import time
import requests
from bs4 import BeautifulSoup
from urllib import parse
exitFlag = 0
class myThread(threading.Thread):
def __init__(self, threadID, name, q):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.q = q
def run(self):
print("开启线程:" + self.name)
process_data(self.name, self.q)
print("退出线程:" + self.name)
def process_data(name, q):
while not exitFlag:
queueLock.acquire()
if not workQueue.empty():
data = q.get()
queueLock.release()
header = {
'User-Agent': '',
'accept-language': '',
'accept': '',
'cookie': ''
}
response = requests.get(data.replace("\n", ""), headers=header)
if response.status_code == 200:
try:
text = response.text
##print(text)
soup = BeautifulSoup(response.text, 'html.parser')
div = soup.find('div', class_='row u-pl-10 u-pr-10')
url = div.find("a", class_='download-link')
r = requests.get(url.attrs['href'], headers=header, stream=True)
path = parse.urlparse(r.url).path
fileName = path[path.rindex("/"):len(path)]
with open(r'D:\Creative Fabrica\\' + fileName, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024): # 边下载边存硬盘
if chunk:
f.write(chunk)
print(data + "下载完成")
with open(r'C:\Users\user\Desktop\logs.txt', 'a') as log:
log.write(data)
except Exception as e:
print(str(e))
with open(r'C:\Users\user\Desktop\failed.txt', 'a') as log:
log.write(data)
else:
queueLock.release()
time.sleep(1)
with open(r'C:\Users\user\Desktop\links.txt', 'r') as f:
links = f.readlines()
links = list(reversed(links))
nameList = links
queueLock = threading.Lock()
workQueue = queue.Queue(len(nameList)+1000)
threads = []
threadID = 1
# 创建新线程
for i in range(1, 101):
thread = myThread(threadID, i, workQueue)
thread.start()
threads.append(thread)
threadID += 1
# 填充队列
queueLock.acquire()
for word in nameList:
workQueue.put(word)
queueLock.release()
# 等待队列清空
while not workQueue.empty():
pass
# 通知线程是时候退出
exitFlag = 1
# 等待所有线程完成
for t in threads:
t.join()
print("退出主线程")