-- coding: utf-8 --
“””
Created on Sun Oct 01 19:00:35 2017
@author: 莱克巴格
“”“
“”“首先构建最简单的下载函数”“”
import urllib2 as u2
url = “http://www.baidu.com”
def download(url): #构建download函数 传入参数url
return u2.urlopen(url).read()
“”“”该函数在遇到一些无法控制的错误时,URLlib2会抛出异常,退出脚本”“”
“”“下面是一个更加健壮的脚本 可以显示错误代码以及原因”“”
import urllib2
url = “http://www.baidu.com”
def download(url):
print “downloading” , url
try:
html = urllib2.urlopen(url)
except urllib2.URLerror as e:
print “download ERROR”,e.reason()
html = None
return html
“”“构建可以重试下载的download函数”“”
测试地址:url = ‘http://httpstat.us/500’
import urllib2
def download(url,retry_num = 5):
print “downloading” , url
try:
html = urllib2.urlopen(url).read()
except urllib2.URLError as e:
print “DownloadError:”,e.reason
html = None
if retry_num > 0:
if hasattr(e,’code’) and 500 <= e.code < 600:
return download(url,retry_num-1)
return html
“”“设置用户代理”“”
import urllib2
def download(url,user_agent=’wswp’,retry_num = 5):
print “downloading” , url
headers = {‘User_agent’:user_agent}
request = urllib2.Request(url,headers=headers)
try:
html = urllib2.urlopen(url).read()
except urllib2.URLError as e:
print "DownloadError:",e.reason
html = None
if retry_num > 0:
if hasattr(e,'code') and 500 <= e.code < 600:
return download(url,retry_num-1)
return html