页面的js链接点击不了,写了个python的脚本,用wget把网页抓下来再整到bt上面去吧:
#!/usr/bin/env python
# encoding: utf-8
"""
get-algorithm.py
Created by <zhkzyth@gmail.com> on  6 22, 2013
"""
from BeautifulSoup import BeautifulSoup
import re
import codecs
def main():
    read_data = []
    with codecs.open('data.html','r+', 'utf-8') as f:
        read_data += f.read()
    soup = BeautifulSoup("".join(read_data))
    links = soup.findAll('a',href=re.compile("^ed2k.*"))
    results = []
    for link in links:
        results += link['href']+"\n"
    with codecs.open('tmp','w','utf-8') as f:
        f.write("".join(results))
if __name__ == '__main__':
    main()  
回复  更多评论