2016-12-27
Python爬取微博图片

爬取指定用户的页面内容，并将图片下载保存到本地
import requests
import urllib2
from lxml import etree
import re
from multiprocessing.dummy import Pool
def tosave(texta):
    f = open('weibo.txt', 'a')    
    f.write(texta + '\n')    
    f.close()
cook = {"Cookie": "XXXXXX"}
url0 = 'http://weibo.cn/u/XXXXX'
def getContent(url, cook):
    html = requests.get(url, cookies=cook).content
    selector = etree.HTML(html)
    # read text from weibo
    content = selector.xpath('//span[@class="ctt"]')
    for each in content:
        text = each.xpath('string(.)')
        print text        
content = selector.xpath('//*[@class="ib"]/@src')  # copy from chrome # print content
return content

def getDownImg(cons, page):
    x = 0
    for each in cons:
       print each
       fn = '%s' % page + '%s' % x
       fl='%s.jpg' % fn
       url=each.replace('wap180','large')
       print url
       print fn
       r = requests.get(url)
       with open(fl, "wb") as code:
           code.write(r.content)
       x += 1

for page in range(1, 4):
   url = url0 + '?page=' + '%s' % page
   print url
   cons = getContent(url, cook)
   getDownImg(cons, page)