爬取指定用户的页面内容,并将图片下载保存到本地
import requests
import urllib2
from lxml import etree
import re
from multiprocessing.dummy import Pool
def tosave(texta):
f = open('weibo.txt', 'a')
f.write(texta + '\n')
f.close()
cook = {"Cookie": "XXXXXX"}
url0 = 'http://weibo.cn/u/XXXXX'
def getContent(url, cook):
html = requests.get(url, cookies=cook).content
selector = etree.HTML(html)
# read text from weibo
content = selector.xpath('//span[@class="ctt"]')
for each in content:
text = each.xpath('string(.)')
print text
content = selector.xpath('//*[@class="ib"]/@src') # copy from chrome # print content
return content
def getDownImg(cons, page):
x = 0
for each in cons:
print each
fn = '%s' % page + '%s' % x
fl='%s.jpg' % fn
url=each.replace('wap180','large')
print url
print fn
r = requests.get(url)
with open(fl, "wb") as code:
code.write(r.content)
x += 1
for page in range(1, 4):
url = url0 + '?page=' + '%s' % page
print url
cons = getContent(url, cook)
getDownImg(cons, page)