1 import re 2 import urllib.request 3 4 # ------ 获取网页源代码的方法 --- 5 def getHtmlurl): 6 page = urllib.request.urlopenurl) 7 html = page.read) 8 return html 9 10 # ------ getHtml)内输入任意静态URL ------ 11 html = getHtml"http://www.meizitu.com/a/5485.html") 12 # ------ 修改html对象内的字符编码为gbk或者UTF-8 ------ 13 html = html.decode'gbk') 14 15 # ------ 获取所有图片地址的方法 ------ 16 17 # ------ 利用正则表达式匹配网页内容找到图片地址 ------ 18 reg = r'http://[^s]*/[0-9][0-9].jpg)' 19 mmurl = re.findallreg,html) 20 #reg = r'src="[.*S]*.jpg)" pic_ext="jpeg"' 21 #reg = r'src="..jpg)" pic_ext="jpeg"' 22 # imgre = re.compilereg) 23 # imglist = re.findallreg, html) 24 # return imglist 25 #printre.findallreg, html)) 26 27 x = 0 28 for imgurl in mmurl: 29 opener = urllib.request.build_opener) 30 opener.addheaders = ['User-Agent','Mozilla/5.0 Windows NT 6.1; WOW64) AppleWebKit/537.36 KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')] 31 urllib.request.install_openeropener) 32 urllib.request.urlretrieveimgurl, 'd://1/5485/%s.jpg' % x ) 33 x += 1 34 35 print"All Done!")