一个简单的例子 抓取一个页面中的所有图片链接 返回一个列表

#! /usr/bin/env python
# -*- coding: utf8 -*-

import re
import urllib


def get_html(url):
    return urllib.urlopen(url).read()


def get_img(html):
    return re.findall(re.compile(r'src="(.+?\.jpg)" pic_ext'), html)


html = get_html("http://tieba.baidu.com/p/2460150866")

print "\n".join(get_img(html))

输出结果如下:

http://imgsrc.baidu.com/forum/w%3D580/sign=750661a0fcfaaf5184e381b7bc5594ed/75fafbedab64034fc3ed0b80aec379310a551d11.jpg
http://imgsrc.baidu.com/forum/w%3D580/sign=114c0f68b58f8c54e3d3c5270a282dee/3d4e78f0f736afc3c72cf6e3b219ebc4b7451211.jpg
http://imgsrc.baidu.com/forum/w%3D580/sign=3b5751825882b2b7a79f39cc01accb0a/9ac37d1ed21b0ef481c651d8dcc451da81cb3e0f.jpg
http://imgsrc.baidu.com/forum/w%3D580/sign=26c16593bd315c6043956be7bdb0cbe6/894443a98226cffcd8287634b8014a90f603ea3c.jpg
http://imgsrc.baidu.com/forum/w%3D580/sign=8905fff979f0f736d8fe4c093a54b382/08d2d539b6003af318e615e6342ac65c1138b6dd.jpg
http://imgsrc.baidu.com/forum/w%3D580/sign=c6335d3d86d6277fe912323018391f63/9dcd7cd98d1001e97833dd89b90e7bec54e7972e.jpg
http://imgsrc.baidu.com/forum/w%3D580/sign=0d3b26024bed2e73fce98624b700a16d/0faccbef76094b3697eed26ba2cc7cd98d109d3d.jpg
http://imgsrc.baidu.com/forum/w%3D580/sign=a6b7ab9a10dfa9ecfd2e561f52d1f754/48c7a7efce1b9d16a99e53e7f2deb48f8d5464d8.jpg
http://imgsrc.baidu.com/forum/w%3D580/sign=4fb535a17e3e6709be0045f70bc69fb8/50071d950a7b020893c7f66763d9f2d3572cc828.jpg
http://imgsrc.baidu.com/forum/w%3D580/sign=e87250192fdda3cc0be4b82831e83905/07dab6fd5266d016aa6057ae962bd40735fa3538.jpg
http://imgsrc.baidu.com/forum/w%3D580/sign=ac526d39a1ec08fa260013af69ef3d4d/8a8e8c5494eef01f6178d143e1fe9925bd317dcd.jpg
http://imgsrc.baidu.com/forum/w%3D580/sign=808342bf6a63f6241c5d390bb745eb32/f2be6c81800a19d8cea24d8332fa828ba71e4690.jpg