python meizitu

 1 import re
 2 import urllib.request
 3 
 4 # ------ 获取网页源代码的方法 ---
 5 def getHtmlurl):
 6     page = urllib.request.urlopenurl)
 7     html = page.read)
 8     return html
 9 
10 # ------ getHtml)内输入任意静态URL ------
11 html = getHtml"http://www.meizitu.com/a/5485.html")
12 # ------ 修改html对象内的字符编码为gbk或者UTF-8 ------
13 html = html.decode'gbk')
14 
15 # ------ 获取所有图片地址的方法 ------
16 
17     # ------ 利用正则表达式匹配网页内容找到图片地址 ------
18 reg = r'http://[^s]*/[0-9][0-9].jpg)'
19 mmurl = re.findallreg,html)
20     #reg = r'src="[.*S]*.jpg)" pic_ext="jpeg"'
21     #reg = r'src="..jpg)" pic_ext="jpeg"'
22 #    imgre = re.compilereg)
23 #    imglist = re.findallreg, html)
24 #    return imglist
25 #printre.findallreg, html))
26 
27 x = 0
28 for imgurl in mmurl:
29     opener = urllib.request.build_opener)
30     opener.addheaders = ['User-Agent','Mozilla/5.0 Windows NT 6.1; WOW64) AppleWebKit/537.36 KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
31     urllib.request.install_openeropener)
32     urllib.request.urlretrieveimgurl, 'd://1/5485/%s.jpg' % x )
33     x += 1
34 
35 print"All Done!")

Published by

风君子

独自遨游何稽首 揭天掀地慰生平