淘先锋技术网

首页 1 2 3 4 5 6 7

-- coding: utf-8 --

“””
Created on Sun Oct 01 19:00:35 2017

@author: 莱克巴格
“”“

“”“首先构建最简单的下载函数”“”

import urllib2 as u2
url = “http://www.baidu.com
def download(url): #构建download函数 传入参数url
return u2.urlopen(url).read()

“”“”该函数在遇到一些无法控制的错误时,URLlib2会抛出异常,退出脚本”“”

“”“下面是一个更加健壮的脚本 可以显示错误代码以及原因”“”
import urllib2
url = “http://www.baidu.com
def download(url):
print “downloading” , url
try:
html = urllib2.urlopen(url)
except urllib2.URLerror as e:
print “download ERROR”,e.reason()
html = None
return html

“”“构建可以重试下载的download函数”“”

测试地址:url = ‘http://httpstat.us/500

import urllib2
def download(url,retry_num = 5):
print “downloading” , url
try:
html = urllib2.urlopen(url).read()
except urllib2.URLError as e:
print “DownloadError:”,e.reason
html = None
if retry_num > 0:
if hasattr(e,’code’) and 500 <= e.code < 600:
return download(url,retry_num-1)
return html

“”“设置用户代理”“”
import urllib2
def download(url,user_agent=’wswp’,retry_num = 5):
print “downloading” , url
headers = {‘User_agent’:user_agent}
request = urllib2.Request(url,headers=headers)

try:
    html = urllib2.urlopen(url).read()
except urllib2.URLError as e:
    print "DownloadError:",e.reason
    html = None
    if retry_num > 0:
       if hasattr(e,'code') and 500 <= e.code < 600:
          return download(url,retry_num-1)
return html