<?php
|
|
|
|
namespace App\Services;
|
|
use App\ImageRecord;
|
|
use GuzzleHttp\Client;
|
|
use GuzzleHttp\Psr7\Request;
|
|
use Illuminate\Support\Arr;
|
|
use Illuminate\Support\Facades\Log;
|
|
use Illuminate\Support\Facades\Redis;
|
|
use Throwable;
|
|
|
|
set_time_limit(0);
|
|
ini_set('memory_limit', '-1');
|
|
date_default_timezone_set('UTC');
|
|
|
|
class WeiboService {
|
|
private $files = [];
|
|
private $videoDir = "/Volumes/T7/Image/weibo/video/";
|
|
private $imageDir = "/Volumes/T7/Image/weibo/image/";
|
|
|
|
public function __construct() {
|
|
$this->files = array_merge($this->files, scanFilesWithoutPath($this->imageDir));
|
|
$this->files = array_merge($this->files, scanFilesWithoutPath($this->videoDir));
|
|
}
|
|
|
|
public function weiboContainerIdList(): array {
|
|
// return [ "潇骑校尉曹操" => "1076037040041221",
|
|
// ];
|
|
// return [
|
|
// "tu是真的tu" => "1076032267562540"];
|
|
return [
|
|
"tu是真的tu" => "1076032267562540",
|
|
"自己" => "230259",
|
|
// "轮子哥" => "2304131916825084",
|
|
"徐圣佑- 新号" => "1076035893812490",
|
|
"Afreecatv 精选" => "1076037383142802",
|
|
"AF-录制" => "1076037627445357",
|
|
"录像专用小马甲" => "1076033785550287",
|
|
"录像专用小马甲1" => "1076037453003153",
|
|
"录像专用小马甲2" => "1076036637436995",
|
|
"短短" => "1076032137020951",
|
|
// "物理" => "2304133907143723",
|
|
// "亲亲奥利给 af" => "1005056070814923",
|
|
"FanCam_" => "1076032740158183",
|
|
"ParmyAU" => "1076032482381165",
|
|
"热舞托管站" => "1076037422251891",
|
|
"af尼美舞蹈" => "1076035639898367",
|
|
"傲娇的猫猫猫" => "1076033118852135",
|
|
"绅士甄选" => "1076037644465107",
|
|
"Win话事人" => "1076032439815352",
|
|
"粤上秋璃子" => "1076037747006685",
|
|
"腐团儿" => "1076032304291523",
|
|
"聂小雨" => "1076032579189065",
|
|
"小何同学" => "1076032104580380",
|
|
"只有一个9521" => "1076037504338957",
|
|
"娜扎工作室" => "2304136001863056",
|
|
"刘教師特别授業" => "1076037686776705",
|
|
"Nana姐姐的微博" => "1076037764879728",
|
|
"小蓝蓝 plus" => "1076037409360499",
|
|
"露露" => "1076037731119705",
|
|
"小仓" => "1076036880826706",
|
|
"EstheticBody" => "1076036577888143",
|
|
"kyokyo" => "1076032591431197",
|
|
"softygirls" => "1076035040459465",
|
|
"是你的程儿" => "1076037607376925",
|
|
"Kokuhui" => "1076037687861062",
|
|
"川" => "1076037648703289",
|
|
// "小红帽舞托" => "2304137160189611",
|
|
"李一桐工作室" => "1076035973698579",
|
|
"李一桐" => "1076035372556014",
|
|
"Yukicai-" => "1076031035321575",
|
|
"知一呐" => "1076037458616069",
|
|
"潇骑校尉曹操" => "1076037040041221",
|
|
"yesyanbaby" => "1076035279748872",
|
|
"EmotionalModels" => "1076035832381343",
|
|
"Favoriter1" => "1076036577888143",
|
|
"_hh0neyy_" => "1076033887046428",
|
|
"阿薰kaOri" => "1076036635655873",
|
|
"zatoichii" => "1076036488464342",
|
|
"肥胖小叶" => "1076036865284939",
|
|
"装甲D白兔" => "1076035557057192",
|
|
"阁楼上的猫吖" => "1076033028811321",
|
|
"贺稀饭吃馍" => "1076037557219457",
|
|
"青丘图鉴" => "1076036499814427",
|
|
"Moonquakesjm" => "1076032354737383",
|
|
"柒柒还活着4" => "1076037837187616",
|
|
"rita" => "1076032749401781",
|
|
"行嫂" => "1076037916030814",
|
|
"采花小强" => "1076033714280302",
|
|
"小宁" => "1076036052688342",
|
|
// "娜扎工作室" => "2304136001863056_-_WEIBO_SECOND_PROFILE_WEIBO",
|
|
|
|
];
|
|
}
|
|
|
|
public function moveLastDayCount() {
|
|
$lastDay = date("Y-m-d", strtotime("-1 days"));
|
|
$list = $this->weiboContainerIdList();
|
|
foreach ($list as $key => $containerId) {
|
|
$value = Redis::connection()->get("weibo:count:{$lastDay}:{$containerId}");
|
|
if ($value != null) {
|
|
echo "last day cache is $value \n";
|
|
Log::info("{$containerId} last day cache is $value");
|
|
Redis::connection()->set("weibo:count:last:" . $containerId, $value);
|
|
Redis::connection()->del("weibo:count:{$lastDay}:{$containerId}");
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
public function scrapeWeiboPicAndVideo($content) {
|
|
// if (!isset($_REQUEST['content'])) die('NoData');
|
|
//
|
|
// $content = trim($_REQUEST['content']);
|
|
// echo $content;exit;
|
|
|
|
// @mkdir('data');
|
|
// @mkdir('data/Kendall');
|
|
// @mkdir('data/video');
|
|
// $video_dir = "/Volumes/intel660p/video/weibo/ruye";
|
|
// $image_dir = "/Volumes/intel660p/image/weibo/ruya";
|
|
// $video_dir = "/Volumes/Samsung/weibo/people/徐圣佑/video";
|
|
// $image_dir = "/Volumes/Samsung/weibo/people/徐圣佑/image";
|
|
|
|
$video_dir = "/Volumes/T7/Image/weibo/video/";
|
|
$image_dir = "/Volumes/T7/Image/weibo/image/";
|
|
|
|
try {
|
|
if (strlen($content) > 0) {
|
|
// $fname = 'data/' . microtime_float() . '.txt';
|
|
// file_put_contents( $fname , $content );
|
|
// if (strpos($content, "007GrijXly1hpjcp5qhuzj30u0190n4p") > 0) {
|
|
// Log::info("=====================it's here ================ " . $content);
|
|
// }
|
|
// exit;
|
|
$decoded_json = json_decode($content, true);
|
|
// Log::info("======== decoded_json: ==========" . $content);
|
|
if (array_key_exists("msg", $decoded_json)) {
|
|
Log::info("======== msg ===============" . $decoded_json["msg"]);
|
|
}
|
|
$cards = $decoded_json['data']['cards'];
|
|
if (array_key_exists("cardlistInfo", $decoded_json["data"]) && count($decoded_json["data"]["cardlistInfo"]) > 0) {
|
|
$cardListInfo = $decoded_json["data"]["cardlistInfo"];
|
|
$nowCount = Arr::get($cardListInfo, "total", 0);
|
|
$containerId = Arr::get($cardListInfo, "containerid", "");
|
|
Log::info("current has content");
|
|
$today = date("Y-m-d");
|
|
$todayFirst = Redis::connection()->get("weibo:count:{$today}:{$containerId}");
|
|
if ($todayFirst == null) {
|
|
Redis::connection()->setnx("weibo:count:{$today}:{$containerId}", $nowCount);
|
|
}
|
|
$lastCount = Redis::connection()->get("weibo:count:last:" . $containerId);
|
|
if ($lastCount == null) {
|
|
$lastCount = 0;
|
|
}
|
|
if ($nowCount < $lastCount + 10) {
|
|
Log::info("now count is {$nowCount} and lastCount is {$lastCount} skip");
|
|
return ["skipContainerId" => $containerId];
|
|
}
|
|
} else if ($decoded_json["msg"] == "这里还没有内容") {
|
|
Log::info("current has no content");
|
|
return ["emptyFlag" => true];
|
|
}
|
|
// $cards = array_slice($cards, 3, 1);
|
|
// Log::info(json_encode($cards));exit;
|
|
foreach ($cards as $card) {
|
|
if (!array_key_exists("mblog", $card)) {
|
|
continue;
|
|
}
|
|
$mblog = $card['mblog'];
|
|
// $subdir = "data/video";
|
|
// if (!file_exists($subdir)) {
|
|
// mkdir($subdir);
|
|
// }
|
|
// echo json_encode($mblog);
|
|
// Log::info("mblog info: " . json_encode($mblog));
|
|
if (array_key_exists("pics", $mblog)) {
|
|
Log::info("-------enter 1");
|
|
$pics = $mblog['pics'];
|
|
$user = Arr::get($mblog, "user", []);
|
|
$text = Arr::get($mblog, "text", '');
|
|
// var_dump($pics);
|
|
self::process_pic($pics, $image_dir, $user, $text);
|
|
// foreach ($pics as $pic) {
|
|
// if (array_key_exists("large", $pic)) {
|
|
// $pic_url = $pic['large']['url'];
|
|
// $h2w = $pic['large']['geo']['height']/$pic['large']['geo']['width'];
|
|
// } else {
|
|
// $pic_url = $pic['url'];
|
|
// $h2w = $pic['geo']['height'] / $pic['geo']['width'];
|
|
// }
|
|
// if ($h2w > 15) {
|
|
// continue;
|
|
// }
|
|
// $pic_name = pathinfo($pic_url, PATHINFO_FILENAME);
|
|
// $pic_ext = pathinfo($pic_url, PATHINFO_EXTENSION);
|
|
// $file_name = $subdir . "/" . $pic_name . "." . $pic_ext;
|
|
// if (!file_exists($file_name)) {
|
|
// $pic_content = file_get_contents($pic_url);
|
|
// // echo $pic_content;
|
|
// file_put_contents($file_name, $pic_content);
|
|
// }
|
|
// }
|
|
} else if (array_key_exists("retweeted_status", $mblog)) {
|
|
Log::info("-------enter 2");
|
|
if (array_key_exists("pics", $mblog["retweeted_status"])) {
|
|
$pics = $mblog["retweeted_status"]['pics'];
|
|
# code...
|
|
$user = Arr::get($mblog["retweeted_status"], "user", []);
|
|
$text = Arr::get($mblog["retweeted_status"], "text", '');
|
|
self::process_pic($pics, $image_dir, $user, $text);
|
|
}
|
|
# code...
|
|
} else if (array_key_exists("page_info", $mblog) && array_key_exists("media_info", $mblog["page_info"])) {
|
|
# code...
|
|
Log::info("-------enter 3");
|
|
// Log::info("page_info: ". json_encode($mblog["page_info"]));
|
|
// Log::info("-------replaced ", str_replace("\u{FF1F}","", $mblog["page_info"]));
|
|
$page_info = str_replace("\u{FF1F}", "", $mblog["page_info"]);
|
|
$media_info = $mblog["page_info"]["media_info"];
|
|
$medis_urls = $mblog["page_info"]["urls"];
|
|
$video_url = Arr::get($medis_urls, "mp4_720p_mp4", "");
|
|
Log::info("video_urls: " . $video_url);
|
|
if ($video_url == "") {
|
|
$video_url = Arr::get($medis_urls, "mp4_hd_url", "");
|
|
}
|
|
if ($video_url == "") {
|
|
$video_url = Arr::get($medis_urls, "mp4_ld_mp4", "");
|
|
}
|
|
if ($video_url != "") {
|
|
self::process_video($video_url, $video_dir, $page_info["content2"]);
|
|
}
|
|
}
|
|
}
|
|
|
|
// die('save to ' . $fname . ' url = ' . $_REQUEST['url'] );
|
|
} else {
|
|
die('empty');
|
|
}
|
|
} catch (\Exception $e) {
|
|
Log::error($e);
|
|
Log::error($e->getTraceAsString());
|
|
// Log::info($content);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
public function scrapeGroupWeiboPicAndVideo($content) {
|
|
// $video_dir = "/Volumes/Samsung/weibo/video";
|
|
// $image_dir = "/Volumes/Samsung/weibo/image";
|
|
$video_dir = "/Volumes/Crucial X6/Image/weibo/video/";
|
|
$image_dir = "/Volumes/Crucial X6/Image/weibo/image/";
|
|
|
|
try {
|
|
if (strlen($content) > 0) {
|
|
// $fname = 'data/' . microtime_float() . '.txt';
|
|
// file_put_contents( $fname , $content );
|
|
$decoded_json = json_decode($content, true);
|
|
$cards = $decoded_json['data']['statuses'];
|
|
foreach ($cards as $card) {
|
|
// if (!array_key_exists("mblog", $card)) {
|
|
// continue;
|
|
// }
|
|
$mblog = $card;
|
|
$subdir = "data/video";
|
|
if (!file_exists($subdir)) {
|
|
mkdir($subdir);
|
|
}
|
|
echo json_encode($mblog);
|
|
if (array_key_exists("pics", $mblog)) {
|
|
$pics = $mblog['pics'];
|
|
$user = Arr::get($mblog, "user", []);
|
|
$text = Arr::get($mblog, "text", '');
|
|
var_dump($pics);
|
|
self::process_pic($pics, $image_dir, $user, $text);
|
|
// foreach ($pics as $pic) {
|
|
// if (array_key_exists("large", $pic)) {
|
|
// $pic_url = $pic['large']['url'];
|
|
// $h2w = $pic['large']['geo']['height']/$pic['large']['geo']['width'];
|
|
// } else {
|
|
// $pic_url = $pic['url'];
|
|
// $h2w = $pic['geo']['height'] / $pic['geo']['width'];
|
|
// }
|
|
// if ($h2w > 15) {
|
|
// continue;
|
|
// }
|
|
// $pic_name = pathinfo($pic_url, PATHINFO_FILENAME);
|
|
// $pic_ext = pathinfo($pic_url, PATHINFO_EXTENSION);
|
|
// $file_name = $subdir . "/" . $pic_name . "." . $pic_ext;
|
|
// if (!file_exists($file_name)) {
|
|
// $pic_content = file_get_contents($pic_url);
|
|
// // echo $pic_content;
|
|
// file_put_contents($file_name, $pic_content);
|
|
// }
|
|
// }
|
|
} else if (array_key_exists("retweeted_status", $mblog)) {
|
|
if (array_key_exists("pics", $mblog["retweeted_status"])) {
|
|
$pics = $mblog["retweeted_status"]['pics'];
|
|
# code...
|
|
$user = Arr::get($mblog["retweeted_status"], "user", []);
|
|
$text = Arr::get($mblog["retweeted_status"], "text", '');
|
|
self::process_pic($pics, $image_dir, $user, $text);
|
|
}
|
|
# code...
|
|
} else if (array_key_exists("page_info", $mblog) && array_key_exists("media_info", $mblog["page_info"])) {
|
|
# code...
|
|
$page_info = $mblog["page_info"];
|
|
$media_info = $mblog["page_info"]["media_info"];
|
|
$video_url = Arr::get($media_info, "mp4_720p_mp4", "");
|
|
if ($video_url == "") {
|
|
$video_url = Arr::get($media_info, "mp4_hd_url", "");
|
|
}
|
|
if ($video_url == "") {
|
|
$video_url = Arr::get($media_info, "mp4_sd_url", "");
|
|
}
|
|
if ($video_url != "") {
|
|
self::process_video($video_url, $video_dir, $page_info["content2"]);
|
|
}
|
|
}
|
|
}
|
|
|
|
// die('save to ' . $fname . ' url = ' . $_REQUEST['url'] );
|
|
} else {
|
|
die('empty');
|
|
}
|
|
} catch (\Exception $e) {
|
|
Log::error($e);
|
|
Log::error($e->getTraceAsString());
|
|
// Log::info($content);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
function microtime_float() {
|
|
list($usec, $sec) = explode(" ", microtime());
|
|
return ((float) $usec + (float) $sec);
|
|
}
|
|
|
|
function process_pic($pics, $subDir, $user, $text) {
|
|
if (!file_exists($subDir)) {
|
|
mkdir($subDir);
|
|
}
|
|
$h2w = 0;
|
|
foreach ($pics as $pic) {
|
|
if (array_key_exists("large", $pic)) {
|
|
$pic_url = $pic['large']['url'];
|
|
if (gettype($pic["large"]["geo"]) == "array") {
|
|
$h2w = $pic['large']['geo']['height'] / $pic['large']['geo']['width'];
|
|
}
|
|
} else {
|
|
$pic_url = $pic['url'];
|
|
if (gettype($pic["geo"]) == "array") {
|
|
$h2w = $pic['geo']['height'] / $pic['geo']['width'];
|
|
}
|
|
}
|
|
if ($h2w > 15) {
|
|
continue;
|
|
}
|
|
$picName = pathinfo($pic_url, PATHINFO_FILENAME);
|
|
$picExt = pathinfo($pic_url, PATHINFO_EXTENSION);
|
|
$user_name = Arr::get($user, "screen_name", '');
|
|
if ($user_name != '') {
|
|
$picName = $user_name . '--' . $picName;
|
|
}
|
|
$file_name = $subDir . DIRECTORY_SEPARATOR . $picName . "." . $picExt;
|
|
$baseName = $picName . "." . $picExt;
|
|
if (!str_contains($baseName, "KID=imgbed,photo&") && !file_exists($file_name) && !$this->checkFileHasDownload($baseName)) {
|
|
try {
|
|
$pic_content = $this->downloadImg($pic_url); // echo $pic_content;
|
|
file_put_contents($file_name, $pic_content);
|
|
$this->files[] = $baseName;
|
|
} catch (Throwable $e) {
|
|
Log::error($e->getMessage());
|
|
}
|
|
} else {
|
|
Log::info("$baseName file exists or contains KID");
|
|
}
|
|
}
|
|
}
|
|
|
|
public function downloadImg($url) {
|
|
$urlInfo = parse_url($url);
|
|
$host = $urlInfo["host"];
|
|
$client = new Client();
|
|
$headers = [
|
|
'authority' => $host,
|
|
'accept' => 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
|
|
'accept-language' => 'zh-CN,zh;q=0.9',
|
|
'cache-control' => 'no-cache',
|
|
'pragma' => 'no-cache',
|
|
'referer' => 'https://m.weibo.cn/',
|
|
'sec-ch-ua' => '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
|
|
'sec-ch-ua-mobile' => '?0',
|
|
'sec-ch-ua-platform' => '"macOS"',
|
|
'sec-fetch-dest' => 'image',
|
|
'sec-fetch-mode' => 'no-cors',
|
|
'sec-fetch-site' => 'cross-site',
|
|
'user-agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
|
|
];
|
|
$request = new Request('GET', $url, $headers);
|
|
$res = $client->sendAsync($request)->wait();
|
|
return $res->getBody();
|
|
}
|
|
|
|
function process_video($video_url, $subdir, $video_name) {
|
|
Log::info("video_url: " . $video_url . " and video name is : " . $video_name);
|
|
if (!file_exists($subdir)) {
|
|
mkdir($subdir);
|
|
}
|
|
$url_params = parse_url($video_url);
|
|
parse_str($url_params["query"], $params);
|
|
$video_origin_name = $url_params["path"];
|
|
$video_origin_name = substr($video_origin_name, 1);
|
|
$video_origin_name = str_replace("stream/", "--", $video_origin_name);
|
|
$video_origin_name = str_replace("/", "", $video_origin_name);
|
|
$video_name = preg_replace("/(http|https|ftp)(.)*([a-z0-9\-\.\_])+/i", "", $video_name);
|
|
$video_name = str_replace("/", "", $video_name);
|
|
$now = strtotime(date("y-m-d h:i:s"));
|
|
if ($now > $params['Expires']) {
|
|
Log::error("视频有效期已过,now is " . $now . ", Expires is " . $params['Expires']);
|
|
abort(404);
|
|
}
|
|
|
|
// $video_origin_name = pathinfo($video_url, PATHINFO_FILENAME);
|
|
// $video_ext = pathinfo($video_url, PATHINFO_EXTENSION);
|
|
$file_name = $subdir . "/" . $video_name . "--" . $video_origin_name;
|
|
$baseName = $video_name . "--" . $video_origin_name;
|
|
if (!file_exists($file_name) && !$this->checkFileHasDownload($baseName)) {
|
|
# code...
|
|
$video_content = file_get_contents($video_url);
|
|
file_put_contents($file_name, $video_content);
|
|
$this->files[] = $baseName;
|
|
} else {
|
|
Log::info("$baseName file exists");
|
|
}
|
|
}
|
|
|
|
private function checkFileHasDownload($fileName) {
|
|
if (in_array($fileName, $this->files)) {
|
|
Log::info("$fileName exist in local files");
|
|
return true;
|
|
}
|
|
$record = ImageRecord::where("name", $fileName)->first();
|
|
if ($record != null) {
|
|
Log::info("$fileName exist in db");
|
|
return true;
|
|
}
|
|
}
|
|
|
|
public function scrapeWeiboComments($content) {
|
|
|
|
}
|
|
|
|
}
|