|
|
|
@ -10,427 +10,445 @@ use Illuminate\Support\Facades\Redis; |
|
|
|
use Throwable; |
|
|
|
|
|
|
|
set_time_limit(0); |
|
|
|
ini_set('memory_limit','-1'); |
|
|
|
ini_set('memory_limit', '-1'); |
|
|
|
date_default_timezone_set('UTC'); |
|
|
|
|
|
|
|
class WeiboService |
|
|
|
{ |
|
|
|
private $files = []; |
|
|
|
private $videoDir = "/Volumes/T7/Image/weibo/video/"; |
|
|
|
private $imageDir = "/Volumes/T7/Image/weibo/image/"; |
|
|
|
class WeiboService { |
|
|
|
private $files = []; |
|
|
|
private $videoDir = "/Volumes/T7/Image/weibo/video/"; |
|
|
|
private $imageDir = "/Volumes/T7/Image/weibo/image/"; |
|
|
|
|
|
|
|
public function __construct() |
|
|
|
{ |
|
|
|
$this->files = array_merge($this->files, scanFilesWithoutPath($this->imageDir)); |
|
|
|
$this->files = array_merge($this->files, scanFilesWithoutPath($this->videoDir)); |
|
|
|
} |
|
|
|
public function __construct() { |
|
|
|
$this->files = array_merge($this->files, scanFilesWithoutPath($this->imageDir)); |
|
|
|
$this->files = array_merge($this->files, scanFilesWithoutPath($this->videoDir)); |
|
|
|
} |
|
|
|
|
|
|
|
public function weiboContainerIdList(): array |
|
|
|
{ |
|
|
|
return [ |
|
|
|
"tu是真的tu" => "1076032267562540", |
|
|
|
"自己" => "230259", |
|
|
|
public function weiboContainerIdList(): array { |
|
|
|
// return [ "潇骑校尉曹操" => "1076037040041221",
|
|
|
|
// ];
|
|
|
|
// return [
|
|
|
|
// "tu是真的tu" => "1076032267562540"];
|
|
|
|
return [ |
|
|
|
"tu是真的tu" => "1076032267562540", |
|
|
|
"自己" => "230259", |
|
|
|
// "轮子哥" => "2304131916825084",
|
|
|
|
"徐圣佑- 新号" => "1076035893812490", |
|
|
|
"Afreecatv 精选" => "1076037383142802", |
|
|
|
"AF-录制" => "1076037627445357", |
|
|
|
"录像专用小马甲" => "1076033785550287", |
|
|
|
"录像专用小马甲1" => "1076037453003153", |
|
|
|
"录像专用小马甲2" => "1076036637436995", |
|
|
|
"短短" => "1076032137020951", |
|
|
|
"徐圣佑- 新号" => "1076035893812490", |
|
|
|
"Afreecatv 精选" => "1076037383142802", |
|
|
|
"AF-录制" => "1076037627445357", |
|
|
|
"录像专用小马甲" => "1076033785550287", |
|
|
|
"录像专用小马甲1" => "1076037453003153", |
|
|
|
"录像专用小马甲2" => "1076036637436995", |
|
|
|
"短短" => "1076032137020951", |
|
|
|
// "物理" => "2304133907143723",
|
|
|
|
// "亲亲奥利给 af" => "1005056070814923",
|
|
|
|
"FanCam_" => "1076032740158183", |
|
|
|
"ParmyAU" => "1076032482381165", |
|
|
|
"热舞托管站" => "1076037422251891", |
|
|
|
"af尼美舞蹈" => "1076035639898367", |
|
|
|
"傲娇的猫猫猫" => "1076033118852135", |
|
|
|
"绅士甄选" => "1076037644465107", |
|
|
|
"Win话事人" => "1076032439815352", |
|
|
|
"粤上秋璃子" => "1076037747006685", |
|
|
|
"腐团儿" => "1076032304291523", |
|
|
|
"聂小雨" => "1076032579189065", |
|
|
|
"小何同学" => "1076032104580380", |
|
|
|
"只有一个9521" => "1076037504338957", |
|
|
|
"娜扎工作室" => "2304136001863056", |
|
|
|
"刘教師特别授業" => "1076037686776705", |
|
|
|
"Nana姐姐的微博" => "1076037764879728", |
|
|
|
"小蓝蓝 plus" => "1076037409360499", |
|
|
|
"露露" => "1076037731119705", |
|
|
|
"小仓" => "1076036880826706", |
|
|
|
"EstheticBody" => "1076036577888143", |
|
|
|
"kyokyo" => "1076032591431197", |
|
|
|
"softygirls" => "1076035040459465", |
|
|
|
"是你的程儿" => "1076037607376925", |
|
|
|
"Kokuhui" => "1076037504965364", |
|
|
|
"川" => "1076037648703289", |
|
|
|
// "亲亲奥利给 af" => "1005056070814923",
|
|
|
|
"FanCam_" => "1076032740158183", |
|
|
|
"ParmyAU" => "1076032482381165", |
|
|
|
"热舞托管站" => "1076037422251891", |
|
|
|
"af尼美舞蹈" => "1076035639898367", |
|
|
|
"傲娇的猫猫猫" => "1076033118852135", |
|
|
|
"绅士甄选" => "1076037644465107", |
|
|
|
"Win话事人" => "1076032439815352", |
|
|
|
"粤上秋璃子" => "1076037747006685", |
|
|
|
"腐团儿" => "1076032304291523", |
|
|
|
"聂小雨" => "1076032579189065", |
|
|
|
"小何同学" => "1076032104580380", |
|
|
|
"只有一个9521" => "1076037504338957", |
|
|
|
"娜扎工作室" => "2304136001863056", |
|
|
|
"刘教師特别授業" => "1076037686776705", |
|
|
|
"Nana姐姐的微博" => "1076037764879728", |
|
|
|
"小蓝蓝 plus" => "1076037409360499", |
|
|
|
"露露" => "1076037731119705", |
|
|
|
"小仓" => "1076036880826706", |
|
|
|
"EstheticBody" => "1076036577888143", |
|
|
|
"kyokyo" => "1076032591431197", |
|
|
|
"softygirls" => "1076035040459465", |
|
|
|
"是你的程儿" => "1076037607376925", |
|
|
|
"Kokuhui" => "1076037687861062", |
|
|
|
"川" => "1076037648703289", |
|
|
|
// "小红帽舞托" => "2304137160189611",
|
|
|
|
"李一桐工作室" => "1076035973698579", |
|
|
|
"李一桐" => "1076035372556014", |
|
|
|
"Yukicai-" => "1076031035321575", |
|
|
|
"知一呐" => "1076037458616069", |
|
|
|
"潇骑校尉曹操" => "1076037040041221", |
|
|
|
"yesyanbaby" => "1076035279748872", |
|
|
|
"EmotionalModels" => "1076035832381343", |
|
|
|
"Favoriter1" => "1076036577888143", |
|
|
|
"_hh0neyy_" => "1076033887046428" |
|
|
|
// "娜扎工作室" => "2304136001863056_-_WEIBO_SECOND_PROFILE_WEIBO",
|
|
|
|
"李一桐工作室" => "1076035973698579", |
|
|
|
"李一桐" => "1076035372556014", |
|
|
|
"Yukicai-" => "1076031035321575", |
|
|
|
"知一呐" => "1076037458616069", |
|
|
|
"潇骑校尉曹操" => "1076037040041221", |
|
|
|
"yesyanbaby" => "1076035279748872", |
|
|
|
"EmotionalModels" => "1076035832381343", |
|
|
|
"Favoriter1" => "1076036577888143", |
|
|
|
"_hh0neyy_" => "1076033887046428", |
|
|
|
"阿薰kaOri" => "1076036635655873", |
|
|
|
"zatoichii" => "1076036488464342", |
|
|
|
"肥胖小叶" => "1076036865284939", |
|
|
|
"装甲D白兔" => "1076035557057192", |
|
|
|
"阁楼上的猫吖" => "1076033028811321", |
|
|
|
"贺稀饭吃馍" => "1076037557219457", |
|
|
|
"青丘图鉴" => "1076036499814427", |
|
|
|
"Moonquakesjm" => "1076032354737383", |
|
|
|
"柒柒还活着4" => "1076037837187616", |
|
|
|
"rita" => "1076032749401781", |
|
|
|
"行嫂" => "1076037916030814", |
|
|
|
"采花小强" => "1076033714280302", |
|
|
|
"小宁" => "1076036052688342", |
|
|
|
// "娜扎工作室" => "2304136001863056_-_WEIBO_SECOND_PROFILE_WEIBO",
|
|
|
|
|
|
|
|
]; |
|
|
|
} |
|
|
|
]; |
|
|
|
} |
|
|
|
|
|
|
|
public function moveLastDayCount() { |
|
|
|
$lastDay = date("Y-m-d", strtotime("-1 days")); |
|
|
|
$list = $this->weiboContainerIdList(); |
|
|
|
foreach ($list as $key => $containerId) { |
|
|
|
$value = Redis::connection()->get("weibo:count:{$lastDay}:{$containerId}"); |
|
|
|
if ($value != null) { |
|
|
|
echo "last day cache is $value \n"; |
|
|
|
Redis::connection()->set("weibo:count:last:" . $containerId, $value); |
|
|
|
Redis::connection()->del("weibo:count:{$lastDay}:{$containerId}"); |
|
|
|
} |
|
|
|
} |
|
|
|
public function moveLastDayCount() { |
|
|
|
$lastDay = date("Y-m-d", strtotime("-1 days")); |
|
|
|
$list = $this->weiboContainerIdList(); |
|
|
|
foreach ($list as $key => $containerId) { |
|
|
|
$value = Redis::connection()->get("weibo:count:{$lastDay}:{$containerId}"); |
|
|
|
if ($value != null) { |
|
|
|
echo "last day cache is $value \n"; |
|
|
|
Log::info("{$containerId} last day cache is $value"); |
|
|
|
Redis::connection()->set("weibo:count:last:" . $containerId, $value); |
|
|
|
Redis::connection()->del("weibo:count:{$lastDay}:{$containerId}"); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
public function scrapeWeiboPicAndVideo($content) |
|
|
|
{ |
|
|
|
public function scrapeWeiboPicAndVideo($content) { |
|
|
|
// if (!isset($_REQUEST['content'])) die('NoData');
|
|
|
|
//
|
|
|
|
// $content = trim($_REQUEST['content']);
|
|
|
|
// echo $content;exit;
|
|
|
|
|
|
|
|
//
|
|
|
|
// $content = trim($_REQUEST['content']);
|
|
|
|
// echo $content;exit;
|
|
|
|
|
|
|
|
// @mkdir('data');
|
|
|
|
// @mkdir('data/Kendall');
|
|
|
|
// @mkdir('data/video');
|
|
|
|
// $video_dir = "/Volumes/intel660p/video/weibo/ruye";
|
|
|
|
// $image_dir = "/Volumes/intel660p/image/weibo/ruya";
|
|
|
|
// $video_dir = "/Volumes/Samsung/weibo/people/徐圣佑/video";
|
|
|
|
// $image_dir = "/Volumes/Samsung/weibo/people/徐圣佑/image";
|
|
|
|
// @mkdir('data/Kendall');
|
|
|
|
// @mkdir('data/video');
|
|
|
|
// $video_dir = "/Volumes/intel660p/video/weibo/ruye";
|
|
|
|
// $image_dir = "/Volumes/intel660p/image/weibo/ruya";
|
|
|
|
// $video_dir = "/Volumes/Samsung/weibo/people/徐圣佑/video";
|
|
|
|
// $image_dir = "/Volumes/Samsung/weibo/people/徐圣佑/image";
|
|
|
|
|
|
|
|
$video_dir = "/Volumes/T7/Image/weibo/video/"; |
|
|
|
$image_dir = "/Volumes/T7/Image/weibo/image/"; |
|
|
|
$video_dir = "/Volumes/T7/Image/weibo/video/"; |
|
|
|
$image_dir = "/Volumes/T7/Image/weibo/image/"; |
|
|
|
|
|
|
|
try { |
|
|
|
if (strlen($content) > 0) { |
|
|
|
// $fname = 'data/' . microtime_float() . '.txt';
|
|
|
|
// file_put_contents( $fname , $content );
|
|
|
|
$decoded_json = json_decode($content, true); |
|
|
|
$cards = $decoded_json['data']['cards']; |
|
|
|
$cardListInfo = $decoded_json["data"]["cardlistInfo"]; |
|
|
|
$nowCount = Arr::get($cardListInfo, "total", 0); |
|
|
|
$containerId = Arr::get($cardListInfo, "containerid", ""); |
|
|
|
if ($cardListInfo != null) { |
|
|
|
$today = date("Y-m-d"); |
|
|
|
$todayFirst = Redis::connection()->get("weibo:count:{$today}:{$containerId}"); |
|
|
|
if ($todayFirst == null) { |
|
|
|
Redis::connection()->setnx("weibo:count:{$today}:{$containerId}", $nowCount); |
|
|
|
} |
|
|
|
$lastCount = Redis::connection()->get("weibo:count:last:" . $containerId); |
|
|
|
if ($lastCount == null) { |
|
|
|
$lastCount = 0; |
|
|
|
} |
|
|
|
if ($nowCount < $lastCount + 10) { |
|
|
|
Log::info("now count is {$nowCount} and lastCount is {$lastCount} skip"); |
|
|
|
return ["skipContainerId"=> $containerId]; |
|
|
|
} |
|
|
|
} |
|
|
|
try { |
|
|
|
if (strlen($content) > 0) { |
|
|
|
// $fname = 'data/' . microtime_float() . '.txt';
|
|
|
|
// file_put_contents( $fname , $content );
|
|
|
|
// if (strpos($content, "007GrijXly1hpjcp5qhuzj30u0190n4p") > 0) {
|
|
|
|
// Log::info("=====================it's here ================ " . $content);
|
|
|
|
// }
|
|
|
|
// exit;
|
|
|
|
$decoded_json = json_decode($content, true); |
|
|
|
// Log::info("======== decoded_json: ==========" . $content);
|
|
|
|
if (array_key_exists("msg", $decoded_json)) { |
|
|
|
Log::info("======== msg ===============" . $decoded_json["msg"]); |
|
|
|
} |
|
|
|
$cards = $decoded_json['data']['cards']; |
|
|
|
if (array_key_exists("cardlistInfo", $decoded_json["data"]) && count($decoded_json["data"]["cardlistInfo"]) > 0) { |
|
|
|
$cardListInfo = $decoded_json["data"]["cardlistInfo"]; |
|
|
|
$nowCount = Arr::get($cardListInfo, "total", 0); |
|
|
|
$containerId = Arr::get($cardListInfo, "containerid", ""); |
|
|
|
Log::info("current has content"); |
|
|
|
$today = date("Y-m-d"); |
|
|
|
$todayFirst = Redis::connection()->get("weibo:count:{$today}:{$containerId}"); |
|
|
|
if ($todayFirst == null) { |
|
|
|
Redis::connection()->setnx("weibo:count:{$today}:{$containerId}", $nowCount); |
|
|
|
} |
|
|
|
$lastCount = Redis::connection()->get("weibo:count:last:" . $containerId); |
|
|
|
if ($lastCount == null) { |
|
|
|
$lastCount = 0; |
|
|
|
} |
|
|
|
if ($nowCount < $lastCount + 10) { |
|
|
|
Log::info("now count is {$nowCount} and lastCount is {$lastCount} skip"); |
|
|
|
return ["skipContainerId" => $containerId]; |
|
|
|
} |
|
|
|
} else if ($decoded_json["msg"] == "这里还没有内容") { |
|
|
|
Log::info("current has no content"); |
|
|
|
return ["emptyFlag" => true]; |
|
|
|
} |
|
|
|
// $cards = array_slice($cards, 3, 1);
|
|
|
|
// Log::info(json_encode($cards));exit;
|
|
|
|
foreach ($cards as $card) { |
|
|
|
if (!array_key_exists("mblog", $card)) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
$mblog = $card['mblog']; |
|
|
|
// Log::info(json_encode($cards));exit;
|
|
|
|
foreach ($cards as $card) { |
|
|
|
if (!array_key_exists("mblog", $card)) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
$mblog = $card['mblog']; |
|
|
|
// $subdir = "data/video";
|
|
|
|
// if (!file_exists($subdir)) {
|
|
|
|
// mkdir($subdir);
|
|
|
|
// }
|
|
|
|
// echo json_encode($mblog);
|
|
|
|
// Log::info("mblog info: " . json_encode($mblog));
|
|
|
|
if (array_key_exists("pics", $mblog)) { |
|
|
|
Log::info("-------enter 1"); |
|
|
|
$pics = $mblog['pics']; |
|
|
|
$user = Arr::get($mblog, "user", []); |
|
|
|
$text = Arr::get($mblog, "text", ''); |
|
|
|
// if (!file_exists($subdir)) {
|
|
|
|
// mkdir($subdir);
|
|
|
|
// }
|
|
|
|
// echo json_encode($mblog);
|
|
|
|
// Log::info("mblog info: " . json_encode($mblog));
|
|
|
|
if (array_key_exists("pics", $mblog)) { |
|
|
|
Log::info("-------enter 1"); |
|
|
|
$pics = $mblog['pics']; |
|
|
|
$user = Arr::get($mblog, "user", []); |
|
|
|
$text = Arr::get($mblog, "text", ''); |
|
|
|
// var_dump($pics);
|
|
|
|
self::process_pic($pics, $image_dir, $user, $text); |
|
|
|
// foreach ($pics as $pic) {
|
|
|
|
// if (array_key_exists("large", $pic)) {
|
|
|
|
// $pic_url = $pic['large']['url'];
|
|
|
|
// $h2w = $pic['large']['geo']['height']/$pic['large']['geo']['width'];
|
|
|
|
// } else {
|
|
|
|
// $pic_url = $pic['url'];
|
|
|
|
// $h2w = $pic['geo']['height'] / $pic['geo']['width'];
|
|
|
|
// }
|
|
|
|
// if ($h2w > 15) {
|
|
|
|
// continue;
|
|
|
|
// }
|
|
|
|
// $pic_name = pathinfo($pic_url, PATHINFO_FILENAME);
|
|
|
|
// $pic_ext = pathinfo($pic_url, PATHINFO_EXTENSION);
|
|
|
|
// $file_name = $subdir . "/" . $pic_name . "." . $pic_ext;
|
|
|
|
// if (!file_exists($file_name)) {
|
|
|
|
// $pic_content = file_get_contents($pic_url);
|
|
|
|
// // echo $pic_content;
|
|
|
|
// file_put_contents($file_name, $pic_content);
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
} else if (array_key_exists("retweeted_status", $mblog)) { |
|
|
|
Log::info("-------enter 2"); |
|
|
|
if (array_key_exists("pics", $mblog["retweeted_status"])) { |
|
|
|
$pics = $mblog["retweeted_status"]['pics']; |
|
|
|
# code...
|
|
|
|
$user = Arr::get($mblog["retweeted_status"], "user", []); |
|
|
|
$text = Arr::get($mblog["retweeted_status"], "text", ''); |
|
|
|
self::process_pic($pics, $image_dir, $user, $text); |
|
|
|
} |
|
|
|
# code...
|
|
|
|
} else if (array_key_exists("page_info", $mblog) && array_key_exists("media_info", $mblog["page_info"])) { |
|
|
|
# code...
|
|
|
|
Log::info("-------enter 3"); |
|
|
|
self::process_pic($pics, $image_dir, $user, $text); |
|
|
|
// foreach ($pics as $pic) {
|
|
|
|
// if (array_key_exists("large", $pic)) {
|
|
|
|
// $pic_url = $pic['large']['url'];
|
|
|
|
// $h2w = $pic['large']['geo']['height']/$pic['large']['geo']['width'];
|
|
|
|
// } else {
|
|
|
|
// $pic_url = $pic['url'];
|
|
|
|
// $h2w = $pic['geo']['height'] / $pic['geo']['width'];
|
|
|
|
// }
|
|
|
|
// if ($h2w > 15) {
|
|
|
|
// continue;
|
|
|
|
// }
|
|
|
|
// $pic_name = pathinfo($pic_url, PATHINFO_FILENAME);
|
|
|
|
// $pic_ext = pathinfo($pic_url, PATHINFO_EXTENSION);
|
|
|
|
// $file_name = $subdir . "/" . $pic_name . "." . $pic_ext;
|
|
|
|
// if (!file_exists($file_name)) {
|
|
|
|
// $pic_content = file_get_contents($pic_url);
|
|
|
|
// // echo $pic_content;
|
|
|
|
// file_put_contents($file_name, $pic_content);
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
} else if (array_key_exists("retweeted_status", $mblog)) { |
|
|
|
Log::info("-------enter 2"); |
|
|
|
if (array_key_exists("pics", $mblog["retweeted_status"])) { |
|
|
|
$pics = $mblog["retweeted_status"]['pics']; |
|
|
|
# code...
|
|
|
|
$user = Arr::get($mblog["retweeted_status"], "user", []); |
|
|
|
$text = Arr::get($mblog["retweeted_status"], "text", ''); |
|
|
|
self::process_pic($pics, $image_dir, $user, $text); |
|
|
|
} |
|
|
|
# code...
|
|
|
|
} else if (array_key_exists("page_info", $mblog) && array_key_exists("media_info", $mblog["page_info"])) { |
|
|
|
# code...
|
|
|
|
Log::info("-------enter 3"); |
|
|
|
// Log::info("page_info: ". json_encode($mblog["page_info"]));
|
|
|
|
$page_info = $mblog["page_info"]; |
|
|
|
$media_info = $mblog["page_info"]["media_info"]; |
|
|
|
$medis_urls = $mblog["page_info"]["urls"]; |
|
|
|
$video_url = Arr::get($medis_urls, "mp4_720p_mp4", ""); |
|
|
|
Log::info("video_urls: " . $video_url); |
|
|
|
if ($video_url == "") { |
|
|
|
$video_url = Arr::get($medis_urls, "mp4_hd_url", ""); |
|
|
|
} |
|
|
|
if ($video_url == "") { |
|
|
|
$video_url = Arr::get($medis_urls, "mp4_ld_mp4", ""); |
|
|
|
} |
|
|
|
if ($video_url != "") { |
|
|
|
self::process_video($video_url, $video_dir, $page_info["content2"]); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
// Log::info("-------replaced ", str_replace("\u{FF1F}","", $mblog["page_info"]));
|
|
|
|
$page_info = str_replace("\u{FF1F}", "", $mblog["page_info"]); |
|
|
|
$media_info = $mblog["page_info"]["media_info"]; |
|
|
|
$medis_urls = $mblog["page_info"]["urls"]; |
|
|
|
$video_url = Arr::get($medis_urls, "mp4_720p_mp4", ""); |
|
|
|
Log::info("video_urls: " . $video_url); |
|
|
|
if ($video_url == "") { |
|
|
|
$video_url = Arr::get($medis_urls, "mp4_hd_url", ""); |
|
|
|
} |
|
|
|
if ($video_url == "") { |
|
|
|
$video_url = Arr::get($medis_urls, "mp4_ld_mp4", ""); |
|
|
|
} |
|
|
|
if ($video_url != "") { |
|
|
|
self::process_video($video_url, $video_dir, $page_info["content2"]); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// die('save to ' . $fname . ' url = ' . $_REQUEST['url'] );
|
|
|
|
} else { |
|
|
|
die('empty'); |
|
|
|
} |
|
|
|
} catch (\Exception $e) { |
|
|
|
Log::error($e); |
|
|
|
Log::error($e->getTraceAsString()); |
|
|
|
// die('save to ' . $fname . ' url = ' . $_REQUEST['url'] );
|
|
|
|
} else { |
|
|
|
die('empty'); |
|
|
|
} |
|
|
|
} catch (\Exception $e) { |
|
|
|
Log::error($e); |
|
|
|
Log::error($e->getTraceAsString()); |
|
|
|
// Log::info($content);
|
|
|
|
} |
|
|
|
return true; |
|
|
|
} |
|
|
|
} |
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
public function scrapeGroupWeiboPicAndVideo($content) |
|
|
|
{ |
|
|
|
// $video_dir = "/Volumes/Samsung/weibo/video";
|
|
|
|
// $image_dir = "/Volumes/Samsung/weibo/image";
|
|
|
|
$video_dir = "/Volumes/Crucial X6/Image/weibo/video/"; |
|
|
|
$image_dir = "/Volumes/Crucial X6/Image/weibo/image/"; |
|
|
|
public function scrapeGroupWeiboPicAndVideo($content) { |
|
|
|
// $video_dir = "/Volumes/Samsung/weibo/video";
|
|
|
|
// $image_dir = "/Volumes/Samsung/weibo/image";
|
|
|
|
$video_dir = "/Volumes/Crucial X6/Image/weibo/video/"; |
|
|
|
$image_dir = "/Volumes/Crucial X6/Image/weibo/image/"; |
|
|
|
|
|
|
|
try { |
|
|
|
if (strlen($content) > 0) { |
|
|
|
// $fname = 'data/' . microtime_float() . '.txt';
|
|
|
|
// file_put_contents( $fname , $content );
|
|
|
|
$decoded_json = json_decode($content, true); |
|
|
|
$cards = $decoded_json['data']['statuses']; |
|
|
|
foreach ($cards as $card) { |
|
|
|
try { |
|
|
|
if (strlen($content) > 0) { |
|
|
|
// $fname = 'data/' . microtime_float() . '.txt';
|
|
|
|
// file_put_contents( $fname , $content );
|
|
|
|
$decoded_json = json_decode($content, true); |
|
|
|
$cards = $decoded_json['data']['statuses']; |
|
|
|
foreach ($cards as $card) { |
|
|
|
// if (!array_key_exists("mblog", $card)) {
|
|
|
|
// continue;
|
|
|
|
// }
|
|
|
|
$mblog = $card; |
|
|
|
$subdir = "data/video"; |
|
|
|
if (!file_exists($subdir)) { |
|
|
|
mkdir($subdir); |
|
|
|
} |
|
|
|
echo json_encode($mblog); |
|
|
|
if (array_key_exists("pics", $mblog)) { |
|
|
|
$pics = $mblog['pics']; |
|
|
|
$user = Arr::get($mblog, "user", []); |
|
|
|
$text = Arr::get($mblog, "text", ''); |
|
|
|
var_dump($pics); |
|
|
|
self::process_pic($pics, $image_dir, $user, $text); |
|
|
|
// foreach ($pics as $pic) {
|
|
|
|
// if (array_key_exists("large", $pic)) {
|
|
|
|
// $pic_url = $pic['large']['url'];
|
|
|
|
// $h2w = $pic['large']['geo']['height']/$pic['large']['geo']['width'];
|
|
|
|
// } else {
|
|
|
|
// $pic_url = $pic['url'];
|
|
|
|
// $h2w = $pic['geo']['height'] / $pic['geo']['width'];
|
|
|
|
// }
|
|
|
|
// if ($h2w > 15) {
|
|
|
|
// continue;
|
|
|
|
// }
|
|
|
|
// $pic_name = pathinfo($pic_url, PATHINFO_FILENAME);
|
|
|
|
// $pic_ext = pathinfo($pic_url, PATHINFO_EXTENSION);
|
|
|
|
// $file_name = $subdir . "/" . $pic_name . "." . $pic_ext;
|
|
|
|
// if (!file_exists($file_name)) {
|
|
|
|
// $pic_content = file_get_contents($pic_url);
|
|
|
|
// // echo $pic_content;
|
|
|
|
// file_put_contents($file_name, $pic_content);
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
} else if (array_key_exists("retweeted_status", $mblog)) { |
|
|
|
if (array_key_exists("pics", $mblog["retweeted_status"])) { |
|
|
|
$pics = $mblog["retweeted_status"]['pics']; |
|
|
|
# code...
|
|
|
|
$user = Arr::get($mblog["retweeted_status"], "user", []); |
|
|
|
$text = Arr::get($mblog["retweeted_status"], "text", ''); |
|
|
|
self::process_pic($pics, $image_dir, $user, $text); |
|
|
|
} |
|
|
|
# code...
|
|
|
|
} else if (array_key_exists("page_info", $mblog) && array_key_exists("media_info", $mblog["page_info"])) { |
|
|
|
# code...
|
|
|
|
$page_info = $mblog["page_info"]; |
|
|
|
$media_info = $mblog["page_info"]["media_info"]; |
|
|
|
$video_url = Arr::get($media_info, "mp4_720p_mp4", ""); |
|
|
|
if ($video_url == "") { |
|
|
|
$video_url = Arr::get($media_info, "mp4_hd_url", ""); |
|
|
|
} |
|
|
|
if ($video_url == "") { |
|
|
|
$video_url = Arr::get($media_info, "mp4_sd_url", ""); |
|
|
|
} |
|
|
|
if ($video_url != "") { |
|
|
|
self::process_video($video_url, $video_dir, $page_info["content2"]); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
// continue;
|
|
|
|
// }
|
|
|
|
$mblog = $card; |
|
|
|
$subdir = "data/video"; |
|
|
|
if (!file_exists($subdir)) { |
|
|
|
mkdir($subdir); |
|
|
|
} |
|
|
|
echo json_encode($mblog); |
|
|
|
if (array_key_exists("pics", $mblog)) { |
|
|
|
$pics = $mblog['pics']; |
|
|
|
$user = Arr::get($mblog, "user", []); |
|
|
|
$text = Arr::get($mblog, "text", ''); |
|
|
|
var_dump($pics); |
|
|
|
self::process_pic($pics, $image_dir, $user, $text); |
|
|
|
// foreach ($pics as $pic) {
|
|
|
|
// if (array_key_exists("large", $pic)) {
|
|
|
|
// $pic_url = $pic['large']['url'];
|
|
|
|
// $h2w = $pic['large']['geo']['height']/$pic['large']['geo']['width'];
|
|
|
|
// } else {
|
|
|
|
// $pic_url = $pic['url'];
|
|
|
|
// $h2w = $pic['geo']['height'] / $pic['geo']['width'];
|
|
|
|
// }
|
|
|
|
// if ($h2w > 15) {
|
|
|
|
// continue;
|
|
|
|
// }
|
|
|
|
// $pic_name = pathinfo($pic_url, PATHINFO_FILENAME);
|
|
|
|
// $pic_ext = pathinfo($pic_url, PATHINFO_EXTENSION);
|
|
|
|
// $file_name = $subdir . "/" . $pic_name . "." . $pic_ext;
|
|
|
|
// if (!file_exists($file_name)) {
|
|
|
|
// $pic_content = file_get_contents($pic_url);
|
|
|
|
// // echo $pic_content;
|
|
|
|
// file_put_contents($file_name, $pic_content);
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
} else if (array_key_exists("retweeted_status", $mblog)) { |
|
|
|
if (array_key_exists("pics", $mblog["retweeted_status"])) { |
|
|
|
$pics = $mblog["retweeted_status"]['pics']; |
|
|
|
# code...
|
|
|
|
$user = Arr::get($mblog["retweeted_status"], "user", []); |
|
|
|
$text = Arr::get($mblog["retweeted_status"], "text", ''); |
|
|
|
self::process_pic($pics, $image_dir, $user, $text); |
|
|
|
} |
|
|
|
# code...
|
|
|
|
} else if (array_key_exists("page_info", $mblog) && array_key_exists("media_info", $mblog["page_info"])) { |
|
|
|
# code...
|
|
|
|
$page_info = $mblog["page_info"]; |
|
|
|
$media_info = $mblog["page_info"]["media_info"]; |
|
|
|
$video_url = Arr::get($media_info, "mp4_720p_mp4", ""); |
|
|
|
if ($video_url == "") { |
|
|
|
$video_url = Arr::get($media_info, "mp4_hd_url", ""); |
|
|
|
} |
|
|
|
if ($video_url == "") { |
|
|
|
$video_url = Arr::get($media_info, "mp4_sd_url", ""); |
|
|
|
} |
|
|
|
if ($video_url != "") { |
|
|
|
self::process_video($video_url, $video_dir, $page_info["content2"]); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// die('save to ' . $fname . ' url = ' . $_REQUEST['url'] );
|
|
|
|
} else { |
|
|
|
die('empty'); |
|
|
|
} |
|
|
|
} catch (\Exception $e) { |
|
|
|
Log::error($e); |
|
|
|
Log::error($e->getTraceAsString()); |
|
|
|
// die('save to ' . $fname . ' url = ' . $_REQUEST['url'] );
|
|
|
|
} else { |
|
|
|
die('empty'); |
|
|
|
} |
|
|
|
} catch (\Exception $e) { |
|
|
|
Log::error($e); |
|
|
|
Log::error($e->getTraceAsString()); |
|
|
|
// Log::info($content);
|
|
|
|
} |
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
function microtime_float() |
|
|
|
{ |
|
|
|
list($usec, $sec) = explode(" ", microtime()); |
|
|
|
return ((float)$usec + (float)$sec); |
|
|
|
} |
|
|
|
function microtime_float() { |
|
|
|
list($usec, $sec) = explode(" ", microtime()); |
|
|
|
return ((float) $usec + (float) $sec); |
|
|
|
} |
|
|
|
|
|
|
|
function process_pic($pics, $subDir, $user, $text) |
|
|
|
{ |
|
|
|
if (!file_exists($subDir)) { |
|
|
|
mkdir($subDir); |
|
|
|
} |
|
|
|
$h2w = 0; |
|
|
|
foreach ($pics as $pic) { |
|
|
|
if (array_key_exists("large", $pic)) { |
|
|
|
$pic_url = $pic['large']['url']; |
|
|
|
if (gettype($pic["large"]["geo"]) == "array") { |
|
|
|
$h2w = $pic['large']['geo']['height'] / $pic['large']['geo']['width']; |
|
|
|
} |
|
|
|
} else { |
|
|
|
$pic_url = $pic['url']; |
|
|
|
if (gettype($pic["geo"]) == "array") { |
|
|
|
$h2w = $pic['geo']['height'] / $pic['geo']['width']; |
|
|
|
} |
|
|
|
} |
|
|
|
if ($h2w > 15) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
$picName = pathinfo($pic_url, PATHINFO_FILENAME); |
|
|
|
$picExt = pathinfo($pic_url, PATHINFO_EXTENSION); |
|
|
|
$user_name = Arr::get($user, "screen_name", ''); |
|
|
|
if ($user_name != '') { |
|
|
|
$picName = $user_name . '--' .$picName; |
|
|
|
} |
|
|
|
$file_name = $subDir . DIRECTORY_SEPARATOR . $picName . "." . $picExt; |
|
|
|
$baseName = $picName . "." . $picExt; |
|
|
|
if (!str_contains($baseName, "KID=imgbed,photo&") && !file_exists($file_name) && !$this->checkFileHasDownload($baseName)) { |
|
|
|
try { |
|
|
|
$pic_content = $this->downloadImg($pic_url);// echo $pic_content;
|
|
|
|
file_put_contents($file_name, $pic_content); |
|
|
|
$this->files[] = $baseName; |
|
|
|
} catch (Throwable $e) { |
|
|
|
Log::error($e->getMessage()); |
|
|
|
} |
|
|
|
} else { |
|
|
|
Log::info("$baseName file exists or contains KID"); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
function process_pic($pics, $subDir, $user, $text) { |
|
|
|
if (!file_exists($subDir)) { |
|
|
|
mkdir($subDir); |
|
|
|
} |
|
|
|
$h2w = 0; |
|
|
|
foreach ($pics as $pic) { |
|
|
|
if (array_key_exists("large", $pic)) { |
|
|
|
$pic_url = $pic['large']['url']; |
|
|
|
if (gettype($pic["large"]["geo"]) == "array") { |
|
|
|
$h2w = $pic['large']['geo']['height'] / $pic['large']['geo']['width']; |
|
|
|
} |
|
|
|
} else { |
|
|
|
$pic_url = $pic['url']; |
|
|
|
if (gettype($pic["geo"]) == "array") { |
|
|
|
$h2w = $pic['geo']['height'] / $pic['geo']['width']; |
|
|
|
} |
|
|
|
} |
|
|
|
if ($h2w > 15) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
$picName = pathinfo($pic_url, PATHINFO_FILENAME); |
|
|
|
$picExt = pathinfo($pic_url, PATHINFO_EXTENSION); |
|
|
|
$user_name = Arr::get($user, "screen_name", ''); |
|
|
|
if ($user_name != '') { |
|
|
|
$picName = $user_name . '--' . $picName; |
|
|
|
} |
|
|
|
$file_name = $subDir . DIRECTORY_SEPARATOR . $picName . "." . $picExt; |
|
|
|
$baseName = $picName . "." . $picExt; |
|
|
|
if (!str_contains($baseName, "KID=imgbed,photo&") && !file_exists($file_name) && !$this->checkFileHasDownload($baseName)) { |
|
|
|
try { |
|
|
|
$pic_content = $this->downloadImg($pic_url); // echo $pic_content;
|
|
|
|
file_put_contents($file_name, $pic_content); |
|
|
|
$this->files[] = $baseName; |
|
|
|
} catch (Throwable $e) { |
|
|
|
Log::error($e->getMessage()); |
|
|
|
} |
|
|
|
} else { |
|
|
|
Log::info("$baseName file exists or contains KID"); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
public function downloadImg($url) |
|
|
|
{ |
|
|
|
$urlInfo = parse_url($url); |
|
|
|
$host = $urlInfo["host"]; |
|
|
|
$client = new Client(); |
|
|
|
$headers = [ |
|
|
|
'authority' => $host, |
|
|
|
'accept' => 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', |
|
|
|
'accept-language' => 'zh-CN,zh;q=0.9', |
|
|
|
'cache-control' => 'no-cache', |
|
|
|
'pragma' => 'no-cache', |
|
|
|
'referer' => 'https://m.weibo.cn/', |
|
|
|
'sec-ch-ua' => '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"', |
|
|
|
'sec-ch-ua-mobile' => '?0', |
|
|
|
'sec-ch-ua-platform' => '"macOS"', |
|
|
|
'sec-fetch-dest' => 'image', |
|
|
|
'sec-fetch-mode' => 'no-cors', |
|
|
|
'sec-fetch-site' => 'cross-site', |
|
|
|
'user-agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36' |
|
|
|
]; |
|
|
|
$request = new Request('GET', $url, $headers); |
|
|
|
$res = $client->sendAsync($request)->wait(); |
|
|
|
return $res->getBody(); |
|
|
|
} |
|
|
|
public function downloadImg($url) { |
|
|
|
$urlInfo = parse_url($url); |
|
|
|
$host = $urlInfo["host"]; |
|
|
|
$client = new Client(); |
|
|
|
$headers = [ |
|
|
|
'authority' => $host, |
|
|
|
'accept' => 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', |
|
|
|
'accept-language' => 'zh-CN,zh;q=0.9', |
|
|
|
'cache-control' => 'no-cache', |
|
|
|
'pragma' => 'no-cache', |
|
|
|
'referer' => 'https://m.weibo.cn/', |
|
|
|
'sec-ch-ua' => '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"', |
|
|
|
'sec-ch-ua-mobile' => '?0', |
|
|
|
'sec-ch-ua-platform' => '"macOS"', |
|
|
|
'sec-fetch-dest' => 'image', |
|
|
|
'sec-fetch-mode' => 'no-cors', |
|
|
|
'sec-fetch-site' => 'cross-site', |
|
|
|
'user-agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', |
|
|
|
]; |
|
|
|
$request = new Request('GET', $url, $headers); |
|
|
|
$res = $client->sendAsync($request)->wait(); |
|
|
|
return $res->getBody(); |
|
|
|
} |
|
|
|
|
|
|
|
function process_video($video_url, $subdir, $video_name) |
|
|
|
{ |
|
|
|
Log::info("video_url: " . $video_url); |
|
|
|
if (!file_exists($subdir)) { |
|
|
|
mkdir($subdir); |
|
|
|
} |
|
|
|
$url_params = parse_url($video_url); |
|
|
|
parse_str($url_params["query"], $params); |
|
|
|
$video_origin_name = $url_params["path"]; |
|
|
|
$video_origin_name = substr($video_origin_name, 1); |
|
|
|
$video_origin_name = str_replace("stream/", "--", $video_origin_name); |
|
|
|
$video_origin_name = str_replace("/", "", $video_origin_name); |
|
|
|
$video_name = preg_replace("/(http|https|ftp)(.)*([a-z0-9\-\.\_])+/i", "", $video_name); |
|
|
|
$video_name = str_replace("/", "", $video_name); |
|
|
|
$now = strtotime(date("y-m-d h:i:s")); |
|
|
|
if ($now > $params['Expires']) { |
|
|
|
Log::error("视频有效期已过,now is " . $now .", Expires is ". $params['Expires']); |
|
|
|
abort(404); |
|
|
|
} |
|
|
|
function process_video($video_url, $subdir, $video_name) { |
|
|
|
Log::info("video_url: " . $video_url . " and video name is : " . $video_name); |
|
|
|
if (!file_exists($subdir)) { |
|
|
|
mkdir($subdir); |
|
|
|
} |
|
|
|
$url_params = parse_url($video_url); |
|
|
|
parse_str($url_params["query"], $params); |
|
|
|
$video_origin_name = $url_params["path"]; |
|
|
|
$video_origin_name = substr($video_origin_name, 1); |
|
|
|
$video_origin_name = str_replace("stream/", "--", $video_origin_name); |
|
|
|
$video_origin_name = str_replace("/", "", $video_origin_name); |
|
|
|
$video_name = preg_replace("/(http|https|ftp)(.)*([a-z0-9\-\.\_])+/i", "", $video_name); |
|
|
|
$video_name = str_replace("/", "", $video_name); |
|
|
|
$now = strtotime(date("y-m-d h:i:s")); |
|
|
|
if ($now > $params['Expires']) { |
|
|
|
Log::error("视频有效期已过,now is " . $now . ", Expires is " . $params['Expires']); |
|
|
|
abort(404); |
|
|
|
} |
|
|
|
|
|
|
|
// $video_origin_name = pathinfo($video_url, PATHINFO_FILENAME);
|
|
|
|
// $video_ext = pathinfo($video_url, PATHINFO_EXTENSION);
|
|
|
|
$file_name = $subdir . "/" . $video_name . "--" . $video_origin_name; |
|
|
|
$baseName = $video_name . "--" . $video_origin_name; |
|
|
|
if (!file_exists($file_name) && !$this->checkFileHasDownload($baseName)) { |
|
|
|
# code...
|
|
|
|
$video_content = file_get_contents($video_url); |
|
|
|
file_put_contents($file_name, $video_content); |
|
|
|
$this->files[] = $baseName; |
|
|
|
} else { |
|
|
|
Log::info("$baseName file exists"); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
private function checkFileHasDownload($fileName) |
|
|
|
{ |
|
|
|
if (in_array($fileName, $this->files)) { |
|
|
|
Log::info("$fileName exist in local files"); |
|
|
|
return true; |
|
|
|
} |
|
|
|
$record = ImageRecord::where("name", $fileName)->first(); |
|
|
|
if ($record != null) { |
|
|
|
Log::info("$fileName exist in db"); |
|
|
|
return true; |
|
|
|
} |
|
|
|
} |
|
|
|
// $video_ext = pathinfo($video_url, PATHINFO_EXTENSION);
|
|
|
|
$file_name = $subdir . "/" . $video_name . "--" . $video_origin_name; |
|
|
|
$baseName = $video_name . "--" . $video_origin_name; |
|
|
|
if (!file_exists($file_name) && !$this->checkFileHasDownload($baseName)) { |
|
|
|
# code...
|
|
|
|
$video_content = file_get_contents($video_url); |
|
|
|
file_put_contents($file_name, $video_content); |
|
|
|
$this->files[] = $baseName; |
|
|
|
} else { |
|
|
|
Log::info("$baseName file exists"); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
public function scrapeWeiboComments($content) { |
|
|
|
private function checkFileHasDownload($fileName) { |
|
|
|
if (in_array($fileName, $this->files)) { |
|
|
|
Log::info("$fileName exist in local files"); |
|
|
|
return true; |
|
|
|
} |
|
|
|
$record = ImageRecord::where("name", $fileName)->first(); |
|
|
|
if ($record != null) { |
|
|
|
Log::info("$fileName exist in db"); |
|
|
|
return true; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
public function scrapeWeiboComments($content) { |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |