PhantomJS
提供了webpage
模块,用于操作网页。
导入模块,并创建一个实例:
var page = require('webpage').create();
在交互模式下,先导入此模块,再输入page
,会打印出此对象的详细构造:
{
"addCookie": "[Function]",
"canGoBack": false,
"canGoForward": false,
"childFramesCount": "[Function]",
"childFramesName": "[Function]",
"clearCookies": "[Function]",
"clearMemoryCache": "[Function]",
"clipRect": {
"height": 0,
"left": 0,
"top": 0,
"width": 0
},
"close": "[Function]",
"closing": "[Function]",
"content": "",
"cookieJar": {
"objectName": "",
"cookies": [],
"destroyed(QObject*)": "[Function]",
"destroyed()": "[Function]",
"objectNameChanged(QString)": "[Function]",
"deleteLater()": "[Function]",
"addCookie(QVariantMap)": "[Function]",
"addCookieFromMap(QVariantMap,QString)": "[Function]",
"addCookieFromMap(QVariantMap)": "[Function]",
"addCookiesFromMap(QVariantList,QString)": "[Function]",
"addCookiesFromMap(QVariantList)": "[Function]",
"cookiesToMap(QString)": "[Function]",
"cookiesToMap()": "[Function]",
"cookieToMap(QString,QString)": "[Function]",
"cookieToMap(QString)": "[Function]",
"deleteCookie(QString,QString)": "[Function]",
"deleteCookie(QString)": "[Function]",
"clearCookies()": "[Function]",
"close()": "[Function]"
},
"cookies": [],
"currentFrameName": "[Function]",
"customHeaders": {},
"deleteCookie": "[Function]",
"evaluateJavaScript": "[Function]",
"finish": "[Function]",
"focusedFrameName": "",
"frameContent": "",
"frameName": "",
"framePlainText": "",
"frameTitle": "",
"frameUrl": "",
"framesCount": 0,
"framesName": [],
"getPage": "[Function]",
"go": "[Function]",
"goBack": "[Function]",
"goForward": "[Function]",
"handleCurrentFrameDestroyed": "[Function]",
"handleRepaintRequested": "[Function]",
"handleUrlChanged": "[Function]",
"initialized": "[Function]",
"injectJs": "[Function]",
"javaScriptAlertSent": "[Function]",
"javaScriptConsoleMessageSent": "[Function]",
"javaScriptErrorSent": "[Function]",
"libraryPath": "/Users/leleliu008/git/document",
"loadFinished": "[Function]",
"loadStarted": "[Function]",
"loading": false,
"loadingProgress": 0,
"navigationLocked": false,
"navigationRequested": "[Function]",
"offlineStoragePath": "/Users/leleliu008/Library/Application Support/Ofi Labs/PhantomJS",
"offlineStorageQuota": 5242880,
"openUrl": "[Function]",
"ownsPages": true,
"pages": [],
"pagesWindowName": [],
"paperSize": {},
"plainText": "",
"rawPageCreated": "[Function]",
"release": "[Function]",
"reload": "[Function]",
"render": "[Function]",
"renderBase64": "[Function]",
"repaintRequested": "[Function]",
"resourceError": "[Function]",
"resourceReceived": "[Function]",
"resourceRequested": "[Function]",
"resourceTimeout": "[Function]",
"scrollPosition": {
"left": 0,
"top": 0
},
"sendEvent": "[Function]",
"setContent": "[Function]",
"setCookieJar": "[Function]",
"setCookieJarFromQObject": "[Function]",
"setCookies": "[Function]",
"setProxy": "[Function]",
"setupFrame": "[Function]",
"stop": "[Function]",
"stopJavaScript": "[Function]",
"switchToChildFrame": "[Function]",
"switchToFocusedFrame": "[Function]",
"switchToFrame": "[Function]",
"switchToMainFrame": "[Function]",
"switchToParentFrame": "[Function]",
"title": "",
"updateLoadingProgress": "[Function]",
"url": "",
"urlChanged": "[Function]",
"viewportSize": {
"height": 300,
"width": 400
},
"windowName": "",
"zoomFactor": 1
}
GET
请求给定的网址。
callback
的原型是function(String status)
,status
只有success
和fail
示例:
var page = require('webpage').create();
page.open('http://slashdot.org', function(status) {
if (status === 'success') {
//TODO
}
phantom.exit();
});
指定使用的HTTP请求方式。
method
是请求方法,比如:GET
、POST
、PUT
、DELETE
、HEAD
等。
body
是请求体的内容(form)。
callback
的原型是function(String status)
,status
只有success
和fail
示例:
var page = require('webpage').create();
page.open('http://slashdot.org', 'POST', 'xxx=xxxx&yyyy=yyyy', function(status) {
if (status === 'success') {
//TODO
}
phantom.exit();
});
详细定制请求。
callback
的原型是function(String status)
,status
只有success
和fail
options
对象包含下面的字段:
字段 | 类型 | 说明 |
---|---|---|
operation | String | 请求方法 |
encoding | String | 编码格式 |
headers | Object | 请求头 |
data | String | 请求体 |
示例:
var page = require('webpage').create();
var options = {
operation: "POST",
encoding: "utf8",
headers: {
"Content-Type": "application/json"
},
data: JSON.stringify({
some: "data",
another: ["custom", "data"]
})
};
page.open('http://slashdot.org', options, function(status) {
if (status === 'success') {
//TODO
}
phantom.exit();
});
该方法用于加载网页外部脚本。比如,网页中没有使用jQuery, 你想使用jQuery操作DOM, 那么就可以使用该方法加载jQuery。
示例:
var page = require('webpage').create();
page.open('http://slashdot.org', function(status) {
if (status === 'success') {
page.includeJS('http://path/to/jquery.min.js', function(err) {
if (!err) {
//TODO
}
phantom.exit();
});
} else{
console.log('fail');
phantom.exit();
}
});
该方法用于打开网页以后,在页面中执行JavaScript代码。
示例1:
var page = require('webpage').create();
page.open('http://slashdot.org', function(status) {
if (status === 'success') {
page.includeJS('http://path/to/jquery.min.js', function(err) {
if (!err) {
var title = page.evaluate(function() {
$("button").click();
});
}
phantom.exit();
});
} else{
console.log('fail');
phantom.exit();
}
});
codeBlock
函数如果有返回值,那么会被page.evaluate(Function codeBlock)
返回。
示例2:
var page = require('webpage').create();
page.open('http://slashdot.org', function(status) {
if (status === 'success') {
var title = page.evaluate(function() {
return document.title;
});
console.log('title = ' + title);
}
phantom.exit();
});
网页内部的console语句,以及调用page.evaluate(Function codeBlock)
方法时候的codeBlock
函数内部的console
语句,默认不会显示在命令行。
我们需要使用此回掉函数,把msg
打印出来。
这是一个回掉函数,既然是回掉函数,就是等着我们赋值的。
示例:
var page = require('webpage').create();
page.onConsoleMessage = function(msg) {
console.log(msg);
};
page.open('http://slashdot.org', function(status) {
if (status === 'success') {
page.evaluate(function() {
console.log('title = ' + document.title);
});
} else{
console.log('fail');
phantom.exit();
}
});
视口的大小。
此对象包含width
和height
两个属性。
注意:在page.open()
方法之前进行设置才有效。
初始界面的缩放因子,范围在[0, 1]
。默认是1
。
注意:在page.open()
方法之前进行设置才有效。
将网页保存成图片。该方法根据后缀名,将网页保存成不同的格式, 目前支持PNG
、GIF
、JPEG
和PDF
。
示例1:
var page = require('webpage').create();
page.viewportSize = {
width: 1920,
height: 1080
};
page.open('http://slashdot.org', function(status) {
if (status === 'success') {
page.render('slashdot.org.png');
}
console.log(status);
phantom.exit();
});
示例2:
var page = require('webpage').create();
page.viewportSize = {
width: 1920,
height: 1080
};
page.zoomFactor = 0.75;
page.open('http://slashdot.org', function(status) {
if (status === 'success') {
page.render('slashdot.org.png', { format: 'jpeg', quality: '100' });
}
console.log(status);
phantom.exit();
});
当页面请求一个资源时,会触发这个回调函数。
当网页收到所请求的资源时,就会执行该回调函数。