最近工作上有一个需求, 需要获取 http://weibo.com/at/weibo 的数据, 就是 @我自己 的数据, 没有接口, 只能通过抓页面. 下面贴下部分代码
<?php/** *
用于模拟新浪微博登录! by CJ ( http://www.summerbluet.com ) *//**
定义项目路径 */define('PROJECT_ROOT_PATH'
, dirname(__FILE__));define('COOKIE_PATH'
, PROJECT_ROOT_PATH );//
通用时间戳define('TIMESTAMP',
time());//
出现问题的时候可以开启, 调试用的, 会在当前文件夹下面创建 LOG 文件define('DEBUG',
false);/**
用来做模拟登录的新浪帐号 */$username
= "";
$password
= "";/*
Fire Up */$weiboLogin
= new
weiboLogin( $username,
$password
);exit($weiboLogin->showTestPage(
'http://weibo.com/at/comment'
));class
weiboLogin { private
$cookiefile; private
$username; private
$password; function
__construct( $username,
$password
) { (
$username
==''
|| $password==''
) && exit(
"请填写用户名密码"
); $this->cookiefile
= COOKIE_PATH.'/cookie_sina_'.substr(base64_encode($username),
0, 10); $this->username
= $username; $this->password
= $password; } /** *
CURL请求 *
@param String $url 请求地址 *
@param Array $data 请求数据 */ function
curlRequest($url,
$data
= false) { $ch
= curl_init(); $option
= array( CURLOPT_URL
=> $url,
CURLOPT_HEADER
=> 0, CURLOPT_HTTPHEADER
=> array('Accept-Language:
zh-cn','Connection:
Keep-Alive','Cache-Control:
no-cache'),
CURLOPT_USERAGENT
=> "Mozilla/5.0
(Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.79 Safari/537.1",
CURLOPT_FOLLOWLOCATION
=> TRUE, CURLOPT_MAXREDIRS
=> 4, CURLOPT_RETURNTRANSFER
=> TRUE, CURLOPT_COOKIEJAR
=> $this->cookiefile, CURLOPT_COOKIEFILE
=> $this->cookiefile ); if
( $data
) { $option[CURLOPT_POST]
= 1; $option[CURLOPT_POSTFIELDS]
= $data; } curl_setopt_array($ch,
$option); $response
= curl_exec($ch); if
(curl_errno($ch)
> 0) { exit("CURL
ERROR:$url "
. curl_error($ch)); } curl_close($ch); return
$response; } /**
@desc CURL 模拟新浪登录 */ function
doSinaLogin() { //
Step 1 : Get tickit $preLoginData
= $this->curlRequest('http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su='
. base64_encode($this->username)
. '&client=ssologin.js(v1.3.16)'); preg_match('/sinaSSOController.preloginCallBack\((.*)\)/',
$preLoginData,
$preArr); $jsonArr
= json_decode($preArr[1],
true); $this->debug('debug_1_Tickit',
$preArr[1]); if
(is_array($jsonArr))
{ //
Step 2 : Do Certification $postArr
= array(
'entry'
=> 'weibo', 'gateway'
=> 1, 'from'
=> '', 'vsnval'
=> '', 'savestate'
=> 7, 'useticket'
=> 1, 'ssosimplelogin'
=> 1, 'su'
=> base64_encode(urlencode($this->username)), 'service'
=> 'miniblog', 'servertime'
=> $jsonArr['servertime'], 'nonce'
=> $jsonArr['nonce'], 'pwencode'
=> 'wsse', 'sp'
=> sha1(sha1(sha1($this->password))
. $jsonArr['servertime']
. $jsonArr['nonce']), 'encoding'
=> 'UTF-8', 'url'
=> 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack', 'returntype'
=> 'META'); $loginData
= $this->curlRequest('http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.3.19)',
$postArr); $this->debug('debug_2_Certification_raw',
$loginData); //
Step 3 : SSOLoginState if
($loginData)
{ $matchs
= $loginResultArr
=array(); preg_match('/replace\(\'(.*?)\'\)/',
$loginData,
$matchs); $this->debug('debug_3_Certification_result',
$matchs[1]);
$loginResult
= $this->curlRequest(
$matchs[1]
); preg_match('/feedBackUrlCallBack\((.*?)\)/',
$loginResult,
$loginResultArr); $userInfo
= json_decode($loginResultArr[1],true); $this->debug('debug_4_UserInfo',
$loginResultArr[1]);
}
else
{ exit('Login
sina fail.'); } }
else
{ exit('Server
tickit fail'); } } /**
测试登录情况, 调用参考 */ function
showTestPage( $url
) { $file_holder
= $this->curlRequest(
$url
); //
如果未登录情况, 登录后再尝试 $isLogin
= strpos(
$file_holder,
'class="user_name"'); if
( !$isLogin
){ unset($file_holder); $this->doSinaLogin(); $file_holder
= $this->curlRequest(
$url
); } return
$file_holder
; }
/**
调试 */ function
debug( $file_name,
$data
) { if
( DEBUG ) { file_put_contents(
$file_name.'.txt',
$data
); } } }
本文详细介绍了如何使用PHP模拟登录到微博网站,并通过代码示例展示了如何抓取@自己的数据,包括登录流程、模拟登录类的实现及测试方法。

1406

被折叠的 条评论
为什么被折叠?



