参考http://www.wespoke.com/archives/000978.html,用python重写了一下,可以指定目录,指定保存路径
#! /usr/bin/python
import urllib, re,sys,os,os.path,getopt
threadNum = 20
savePath = "mp3-2"
optlist,left = getopt.getopt(sys.argv[1:], "t:d:")
for opt in optlist:
    print opt
    if (opt[0]=='-t'):
        threadNum = int(opt[1])
    if (opt[0]=='-d'):
        savePath = opt[1]
print "threadnum="+str(threadNum)
print "savePath="+savePath
if (not os.path.exists(savePath)):
    os.makedirs(savePath)
base = "http://list.mp3.baidu.com/topso/"
url = "http://list.mp3.baidu.com/topso/mp3topsong.html"
def getUrlData(url):
    num = 0
    while (num<3):
        num = num+1
        try :
            f = urllib.urlopen( url )
            data = f.readlines()
            f.close()
            return data
        except:
            pass
    return []
   
data = getUrlData(url)
pattern = re.compile( r'href="(.*?tsomp3.htm)' )
target = [];
for line in data:
    if ( line.find( "tsomp3.htm" )!=-1 ):
        items = pattern.findall( line )
        for item in items:
            target.append( item )
           
print "find ",len( target )," mp3 "
mp3Pattern = re.compile( r'href="(.*?\.mp3)"' )
titlePattern = re.compile( r'<title>.*?_(.*?)\s+</title>' )
import threading
lock = threading.Lock()
def getMp3():
    while True:
        t = ""
        lock.acquire()
        if ( len( target )>0 ):
            t = target[0]
            target.remove( t )
        else :
            return
        lock.release()
        tempUrl = base+t
        data = getUrlData(tempUrl)
        mp3Target = []
        title = "";
        for line in data:
            if ( line.find( "title" )!=-1 ):
                m = titlePattern.search( line )
                if ( m ):
                    title = m.group( 1 )
                    break
        for line in data:
            if ( len( mp3Target )>10 ):
                break
            if ( line.find( ".mp3" )!=-1 ):
                items = mp3Pattern.findall( line )
                for item in items:
                    mp3Target.append( item )
        filename = savePath+"/"+title+".mp3"
        for t in mp3Target:
            try :
                print "try to get "+title+".mp3,url=",t
                ret = urllib.urlretrieve( t, filename )
                size = os.path.getsize(filename)
                if (size>500*1024):
                    print "done:"+title+".mp3"
                    break
            except :
                print "fail to get "+title+".mp3 with url "+t
                pass
for num in range(threadNum):
    thread = threading.Thread( None, getMp3 )
    thread.start()
    print "start thread ",num
   

文章来源:http://spaces.msn.com/members/zzzhc/Blog/cns!1pPbKg7hHgS7AKKQm6CWG1ZQ!125.entry