Lucent's Blog

当时明月在 曾照彩云归

人生不相见,动如参与商。

6LCi5Y2O5qKFLOaIkeaDs+S9oOS6hg==


Python多线程下载

import threading
import time
import requests
from bs4 import BeautifulSoup
from urllib import parse

exitFlag = 0

class myThread(threading.Thread):
    def __init__(self, threadID, name, q):
        threading.Thread.__init__(self)
        self.threadID = threadID
        self.name = name
        self.q = q

    def run(self):
        print("开启线程:" + self.name)
        process_data(self.name, self.q)
        print("退出线程:" + self.name)

def process_data(name, q):
    while not exitFlag:
        queueLock.acquire()
        if not workQueue.empty():
            data = q.get()
            queueLock.release()
            header = {
                'User-Agent': '',
                'accept-language': '',
                'accept': '',
                'cookie': ''
            }
            response = requests.get(data.replace("\n", ""), headers=header)
            if response.status_code == 200:
                try:
                    text = response.text
                    ##print(text)

                    soup = BeautifulSoup(response.text, 'html.parser')
                    div = soup.find('div', class_='row u-pl-10 u-pr-10')
                    url = div.find("a", class_='download-link')
                    r = requests.get(url.attrs['href'], headers=header, stream=True)
                    path = parse.urlparse(r.url).path
                    fileName = path[path.rindex("/"):len(path)]
                    with open(r'D:\Creative Fabrica\\' + fileName, 'wb') as f:
                        for chunk in r.iter_content(chunk_size=1024):  # 边下载边存硬盘
                            if chunk:
                                f.write(chunk)
                    print(data + "下载完成")
                    with open(r'C:\Users\user\Desktop\logs.txt', 'a') as log:
                        log.write(data)
                except Exception as e:
                    print(str(e))
                    with open(r'C:\Users\user\Desktop\failed.txt', 'a') as log:
                        log.write(data)
        else:
            queueLock.release()
        time.sleep(1)


with open(r'C:\Users\user\Desktop\links.txt', 'r') as f:
    links = f.readlines()
links = list(reversed(links))

nameList = links
queueLock = threading.Lock()
workQueue = queue.Queue(len(nameList)+1000)
threads = []
threadID = 1

# 创建新线程
for i in range(1, 101):
    thread = myThread(threadID, i, workQueue)
    thread.start()
    threads.append(thread)
    threadID += 1

# 填充队列
queueLock.acquire()
for word in nameList:
    workQueue.put(word)
queueLock.release()

# 等待队列清空
while not workQueue.empty():
    pass

# 通知线程是时候退出
exitFlag = 1

# 等待所有线程完成
for t in threads:
    t.join()
print("退出主线程")
上一篇

解压一个.zip文件或一个目录下的所有.zip文件到指定目录。运行方法:格式:pythonunzip.py"source_dir""dest_dir"password参数说明:source_dir和dest_dir既可以绝对路径也可以为相对路径。用"…

阅读
下一篇

神童诗【作者】汪洙(宋)天子重英豪,文章教尔曹;万般皆下品,惟有读书高。少小须勤学,文章可立身;满朝朱紫贵,尽是读书人。学问勤中得,萤窗万卷书;三冬今足用,谁笑腹空虚。自小多才学,平生志气高;别人怀宝剑,我有笔如刀。朝为田舍郎,暮登天子堂;将相本无种,男儿当自强。学乃身之宝,儒为席上珍;君看为宰相,…

阅读