在Linux下资源下载的速度长期受限,ed2k,torrent什么都木有速度,坑爹呀,自从购买了迅雷VIP的服务,可以直接以http形式来从迅雷服务器内下载自己托迅雷下载的东西,而且如果你这个资源别人下载过的话,你就不用再次下载了,迅雷马上提示你这个任务已经完成了。至于其他的,用过的人都知道了,也不再细说。如果windows平台配合迅雷客户端用迅雷VIP的话,这个脚本也没有啥意义了(因为客户端更人性化^_^,当然占用资源也不少,嘿嘿),所以前提是你的OS要是Linux,然后使用迅雷离线的web界面。由于firefox下载迅雷离线的东西存在这样几个问题,比如文件名中文乱码,要自己改(暗骂编码ing),不支持断点续传(我挂过几次,不过无奈重新下载了T_T),迅雷在点击下载的时候,响应慢死了,好久才跳出窗口。出于这几个原因,我就去研究了下PT酱的那个离线下载的脚本,然后根据自己的需要重新写如下:#!/usr/bin/env python
# -*- coding: utf-8 -*-
#Time-stamp: <2011-10-25 21:36:28 Tuesday by roowe>
#File Name: thuner_xl_with_wget.py
#Author: bestluoliwe@gmail.com
#My Blog: www.iroowe.com
import re
import time
import os
import logging
import sys
from htmlentitydefs import entitydefs
import subprocess
LOG_FILE = "/tmp/thuner_with_wget.log"
log = None
def log_init(log_file, quiet=False):
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
hdlr = logging.FileHandler(log_file)
formatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s")
hdlr.setFormatter(formatter)
logger.addHandler(hdlr)
if not quiet:
hdlr = logging.StreamHandler()
hdlr.setFormatter(formatter)
logger.addHandler(hdlr)
return logger
def handle_entitydef(matchobj):
key = matchobj.group(1)
if entitydefs.has_key(key):
return entitydefs[key]
else:
return matchobj.group(0)
def collect_urls(html, only_bturls = False):
"""
collect urls
"""
urls = []
for name, url in re.findall(r"<a.+?name=[""]bturls[""] title=[""](.+?)[""].+?href=[""](http.+?)[""]>", html):
name = re.sub("&(.*?);", handle_entitydef, name)
url = re.sub("&(.*?);", handle_entitydef, url)
urls.append((name, url))
if not only_bturls:
for id, name in re.findall(r"<input id=[""]durl(w+?)[""].+title=[""](.+?)[""].+", html):
result = re.search(r"<input id=[""]dl_url%s[""].+value=[""](http.*?)[""]" % id, html)
if result:
name = re.sub("&(.*?);", handle_entitydef, name)
url = result.group(1)
url = re.sub("&(.*?);", handle_entitydef, url)
urls.append((name, url))
log.info("Filter get %d links" % len(urls))
return urls
def choose_download(urls):
download_list = {}
for name, url in urls:
while True:
ans = raw_input("Download %s?[Y/n](default: Y) " % name)
if len(ans) == 0:
ans = True
break
elif ans.lower() == "y":
ans = True
break
elif ans.lower() == "n":
ans = False
break
else:
sys.stdout.write("please enter y or n!
")
continue
download_list[name] = ans
return download_list
def thuner_xl_with_wget(urls, output_dir, cookies_file, quiet=False):
download_list = choose_download(urls)
for name, url in urls:
if len(url) == 0:
log.debug("Empty Link, Name: " + name)
continue
if not download_list[name]:
continue
cmd = ["wget", "--load-cookies", cookies_file, "-c", "-t", "5", "-O", os.path.join(output_dir, name), url]
if quiet:
cmd.insert(1, "-q")
log.info("wget cmd: "%s"" % " ".join(cmd))
ret = subprocess.call(cmd)
if ret != 0:
log.debug("wget returned %d." % ret)
if ret in (3, 8):
log.error("Give up "%s", may be already finished download, or something wrong with disk." % name)
else:
urls.append((name, url))
log.error("will retry for %s later." % name)
continue
else:
log.info("Finished %s" % name)
time.sleep(2)
def thuner_xl_with_aria2c(urls, output_dir, cookies_file, quiet=False):
"""
download with aria2c
"""
download_list = choose_download(urls)
for name, url in urls:
if len(url) == 0:
log.debug("Empty Link, Name: " + name)
continue
if not download_list[name]:
continue
cmd = ["aria2c", "--load-cookies", cookies_file, "-d", output_dir, "-c", "-m", "5", "-s", "5", "-o", name, url]
if quiet:
cmd.insert(1, "-q")
log.info("wget cmd: "%s"" % " ".join(cmd))
ret = subprocess.call(cmd)
if ret != 0:
log.debug("wget returned %d." % ret)
if ret in (13):
log.error("Give up "%s", file already existed." % name)
else:
urls.append((name, url))
log.error("the exit status number is %d, and then will retry for %s later." % (ret, name))
continue
else:
log.info("Finished %s" % name)
time.sleep(2)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Thuner li xian with wget", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-p", nargs="?", default="~/user_task.htm", help="load page file")
parser.add_argument("-c", nargs="?", default="~/cookies.txt", help="load cookie file")
parser.add_argument("-o", nargs="?", default="~/Downloads", help="output dir")
parser.add_argument("-b", action="store_true", default=False, help="bt files only")
parser.add_argument("-q", action="store_true", default=False, help="quiet, only log to file.")
parser.add_argument("-a", action="store_true", default=False, help="download with aria2c")
args = parser.parse_args()
only_bturls, cookies_file, output_dir, page_file, quiet = args.b, args.c, args.o, args.p, args.q
page_file = os.path.expanduser(page_file)
cookies_file = os.path.realpath(os.path.expanduser(cookies_file))
output_dir = os.path.expanduser(output_dir)
log = log_init(LOG_FILE, quiet = quiet)
if not os.path.exists(cookies_file):
log.info("please export cookies file")
sys.exit(0)
if not os.path.isdir(output_dir):
log.info("No such %s", output_dir)
sys.exit(0)
with open(page_file) as f:
page_html = f.read()
urls = collect_urls(page_html, only_bturls)
if not args.a:
thuner_xl_with_wget(urls, output_dir, cookies_file, quiet)
else:
thuner_xl_with_aria2c(urls, output_dir, cookies_file, quiet)
Ubuntu 11.10 使用GVim 遇到的两个问题Ubuntu安装遭遇 VMware上的Easy install模式相关资讯 Linux知识
- 时光总是太匆匆!Linux已经诞生23 (08/29/2014 14:12:03)
- Linux虚拟文件系统之文件打开(sys (02/14/2012 11:41:54)
- 2012 年 Linux 峰会时间表 (02/14/2012 06:47:27)
| - 报告称当前 Linux 人才抢手 高薪也 (02/15/2012 06:35:56)
- 解析企业为何选择Linux及其特别之 (02/14/2012 08:17:59)
- Linux禁用字符闪烁的方法 (11/02/2011 10:28:25)
|
本文评论 查看全部评论 (0)