You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

349 lines
15 KiB

<?php
namespace App\Services;
use App\ImageRecord;
use GuzzleHttp\Client;
use GuzzleHttp\Psr7\Request;
use Illuminate\Support\Arr;
use Illuminate\Support\Facades\Log;
use Throwable;
set_time_limit(0);
ini_set('memory_limit','-1');
date_default_timezone_set('UTC');
class WeiboService
{
private $files = [];
private $videoDir = "/Volumes/T7/Image/weibo/video/";
private $imageDir = "/Volumes/T7/Image/weibo/image/";
public function __construct()
{
$this->files = array_merge($this->files, scanFilesWithoutPath($this->imageDir));
$this->files = array_merge($this->files, scanFilesWithoutPath($this->videoDir));
}
public function scrapeWeiboPicAndVideo($content)
{
// if (!isset($_REQUEST['content'])) die('NoData');
//
// $content = trim($_REQUEST['content']);
// echo $content;exit;
// @mkdir('data');
// @mkdir('data/Kendall');
// @mkdir('data/video');
// $video_dir = "/Volumes/intel660p/video/weibo/ruye";
// $image_dir = "/Volumes/intel660p/image/weibo/ruya";
// $video_dir = "/Volumes/Samsung/weibo/people/徐圣佑/video";
// $image_dir = "/Volumes/Samsung/weibo/people/徐圣佑/image";
$video_dir = "/Volumes/T7/Image/weibo/video/";
$image_dir = "/Volumes/T7/Image/weibo/image/";
try {
if (strlen($content) > 0) {
// $fname = 'data/' . microtime_float() . '.txt';
// file_put_contents( $fname , $content );
$decoded_json = json_decode($content, true);
$cards = $decoded_json['data']['cards'];
// $cards = array_slice($cards, 3, 1);
// Log::info(json_encode($cards));exit;
foreach ($cards as $card) {
if (!array_key_exists("mblog", $card)) {
continue;
}
$mblog = $card['mblog'];
// $subdir = "data/video";
// if (!file_exists($subdir)) {
// mkdir($subdir);
// }
// echo json_encode($mblog);
// Log::info("mblog info: " . json_encode($mblog));
if (array_key_exists("pics", $mblog)) {
Log::info("-------enter 1");
$pics = $mblog['pics'];
$user = Arr::get($mblog, "user", []);
$text = Arr::get($mblog, "text", '');
// var_dump($pics);
self::process_pic($pics, $image_dir, $user, $text);
// foreach ($pics as $pic) {
// if (array_key_exists("large", $pic)) {
// $pic_url = $pic['large']['url'];
// $h2w = $pic['large']['geo']['height']/$pic['large']['geo']['width'];
// } else {
// $pic_url = $pic['url'];
// $h2w = $pic['geo']['height'] / $pic['geo']['width'];
// }
// if ($h2w > 15) {
// continue;
// }
// $pic_name = pathinfo($pic_url, PATHINFO_FILENAME);
// $pic_ext = pathinfo($pic_url, PATHINFO_EXTENSION);
// $file_name = $subdir . "/" . $pic_name . "." . $pic_ext;
// if (!file_exists($file_name)) {
// $pic_content = file_get_contents($pic_url);
// // echo $pic_content;
// file_put_contents($file_name, $pic_content);
// }
// }
} else if (array_key_exists("retweeted_status", $mblog)) {
Log::info("-------enter 2");
if (array_key_exists("pics", $mblog["retweeted_status"])) {
$pics = $mblog["retweeted_status"]['pics'];
# code...
$user = Arr::get($mblog["retweeted_status"], "user", []);
$text = Arr::get($mblog["retweeted_status"], "text", '');
self::process_pic($pics, $image_dir, $user, $text);
}
# code...
} else if (array_key_exists("page_info", $mblog) && array_key_exists("media_info", $mblog["page_info"])) {
# code...
Log::info("-------enter 3");
// Log::info("page_info: ". json_encode($mblog["page_info"]));
$page_info = $mblog["page_info"];
$media_info = $mblog["page_info"]["media_info"];
$medis_urls = $mblog["page_info"]["urls"];
$video_url = Arr::get($medis_urls, "mp4_720p_mp4", "");
Log::info("video_urls: " . $video_url);
if ($video_url == "") {
$video_url = Arr::get($medis_urls, "mp4_hd_url", "");
}
if ($video_url == "") {
$video_url = Arr::get($medis_urls, "mp4_ld_mp4", "");
}
if ($video_url != "") {
self::process_video($video_url, $video_dir, $page_info["content2"]);
}
}
}
// die('save to ' . $fname . ' url = ' . $_REQUEST['url'] );
} else {
die('empty');
}
} catch (\Exception $e) {
Log::error($e);
Log::error($e->getTraceAsString());
// Log::info($content);
}
return true;
}
public function scrapeGroupWeiboPicAndVideo($content)
{
// $video_dir = "/Volumes/Samsung/weibo/video";
// $image_dir = "/Volumes/Samsung/weibo/image";
$video_dir = "/Volumes/Crucial X6/Image/weibo/video/";
$image_dir = "/Volumes/Crucial X6/Image/weibo/image/";
try {
if (strlen($content) > 0) {
// $fname = 'data/' . microtime_float() . '.txt';
// file_put_contents( $fname , $content );
$decoded_json = json_decode($content, true);
$cards = $decoded_json['data']['statuses'];
foreach ($cards as $card) {
// if (!array_key_exists("mblog", $card)) {
// continue;
// }
$mblog = $card;
$subdir = "data/video";
if (!file_exists($subdir)) {
mkdir($subdir);
}
echo json_encode($mblog);
if (array_key_exists("pics", $mblog)) {
$pics = $mblog['pics'];
$user = Arr::get($mblog, "user", []);
$text = Arr::get($mblog, "text", '');
var_dump($pics);
self::process_pic($pics, $image_dir, $user, $text);
// foreach ($pics as $pic) {
// if (array_key_exists("large", $pic)) {
// $pic_url = $pic['large']['url'];
// $h2w = $pic['large']['geo']['height']/$pic['large']['geo']['width'];
// } else {
// $pic_url = $pic['url'];
// $h2w = $pic['geo']['height'] / $pic['geo']['width'];
// }
// if ($h2w > 15) {
// continue;
// }
// $pic_name = pathinfo($pic_url, PATHINFO_FILENAME);
// $pic_ext = pathinfo($pic_url, PATHINFO_EXTENSION);
// $file_name = $subdir . "/" . $pic_name . "." . $pic_ext;
// if (!file_exists($file_name)) {
// $pic_content = file_get_contents($pic_url);
// // echo $pic_content;
// file_put_contents($file_name, $pic_content);
// }
// }
} else if (array_key_exists("retweeted_status", $mblog)) {
if (array_key_exists("pics", $mblog["retweeted_status"])) {
$pics = $mblog["retweeted_status"]['pics'];
# code...
$user = Arr::get($mblog["retweeted_status"], "user", []);
$text = Arr::get($mblog["retweeted_status"], "text", '');
self::process_pic($pics, $image_dir, $user, $text);
}
# code...
} else if (array_key_exists("page_info", $mblog) && array_key_exists("media_info", $mblog["page_info"])) {
# code...
$page_info = $mblog["page_info"];
$media_info = $mblog["page_info"]["media_info"];
$video_url = Arr::get($media_info, "mp4_720p_mp4", "");
if ($video_url == "") {
$video_url = Arr::get($media_info, "mp4_hd_url", "");
}
if ($video_url == "") {
$video_url = Arr::get($media_info, "mp4_sd_url", "");
}
if ($video_url != "") {
self::process_video($video_url, $video_dir, $page_info["content2"]);
}
}
}
// die('save to ' . $fname . ' url = ' . $_REQUEST['url'] );
} else {
die('empty');
}
} catch (\Exception $e) {
Log::error($e);
Log::error($e->getTraceAsString());
// Log::info($content);
}
return true;
}
function microtime_float()
{
list($usec, $sec) = explode(" ", microtime());
return ((float)$usec + (float)$sec);
}
function process_pic($pics, $subDir, $user, $text)
{
if (!file_exists($subDir)) {
mkdir($subDir);
}
$h2w = 0;
foreach ($pics as $pic) {
if (array_key_exists("large", $pic)) {
$pic_url = $pic['large']['url'];
if (gettype($pic["large"]["geo"]) == "array") {
$h2w = $pic['large']['geo']['height'] / $pic['large']['geo']['width'];
}
} else {
$pic_url = $pic['url'];
if (gettype($pic["geo"]) == "array") {
$h2w = $pic['geo']['height'] / $pic['geo']['width'];
}
}
if ($h2w > 15) {
continue;
}
$picName = pathinfo($pic_url, PATHINFO_FILENAME);
$picExt = pathinfo($pic_url, PATHINFO_EXTENSION);
$user_name = Arr::get($user, "screen_name", '');
if ($user_name != '') {
$picName = $user_name . '--' .$picName;
}
$file_name = $subDir . DIRECTORY_SEPARATOR . $picName . "." . $picExt;
$baseName = $picName . "." . $picExt;
if (!file_exists($file_name) && !$this->checkFileHasDownload($baseName)) {
try {
$pic_content = $this->downloadImg($pic_url);// echo $pic_content;
file_put_contents($file_name, $pic_content);
$this->files[] = $baseName;
} catch (Throwable $e) {
Log::error($e->getMessage());
}
} else {
Log::info("$baseName file exists");
}
}
}
public function downloadImg($url)
{
$urlInfo = parse_url($url);
$host = $urlInfo["host"];
$client = new Client();
$headers = [
'authority' => $host,
'accept' => 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
'accept-language' => 'zh-CN,zh;q=0.9',
'cache-control' => 'no-cache',
'pragma' => 'no-cache',
'referer' => 'https://m.weibo.cn/',
'sec-ch-ua' => '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
'sec-ch-ua-mobile' => '?0',
'sec-ch-ua-platform' => '"macOS"',
'sec-fetch-dest' => 'image',
'sec-fetch-mode' => 'no-cors',
'sec-fetch-site' => 'cross-site',
'user-agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
];
$request = new Request('GET', $url, $headers);
$res = $client->sendAsync($request)->wait();
return $res->getBody();
}
function process_video($video_url, $subdir, $video_name)
{
Log::info("video_url: " . $video_url);
if (!file_exists($subdir)) {
mkdir($subdir);
}
$url_params = parse_url($video_url);
parse_str($url_params["query"], $params);
$video_origin_name = $url_params["path"];
$video_origin_name = substr($video_origin_name, 1);
$video_origin_name = str_replace("stream/", "--", $video_origin_name);
$video_origin_name = str_replace("/", "", $video_origin_name);
$video_name = preg_replace("/(http|https|ftp)(.)*([a-z0-9\-\.\_])+/i", "", $video_name);
$video_name = str_replace("/", "", $video_name);
$now = strtotime(date("y-m-d h:i:s"));
if ($now > $params['Expires']) {
Log::error("视频有效期已过,now is " . $now .", Expires is ". $params['Expires']);
abort(404);
}
// $video_origin_name = pathinfo($video_url, PATHINFO_FILENAME);
// $video_ext = pathinfo($video_url, PATHINFO_EXTENSION);
$file_name = $subdir . "/" . $video_name . "--" . $video_origin_name;
$baseName = $video_name . "--" . $video_origin_name;
if (!file_exists($file_name) && !$this->checkFileHasDownload($baseName)) {
# code...
$video_content = file_get_contents($video_url);
file_put_contents($file_name, $video_content);
$this->files[] = $baseName;
} else {
Log::info("$baseName file exists");
}
}
private function checkFileHasDownload($fileName)
{
if (in_array($fileName, $this->files)) {
Log::info("$fileName exist in local files");
return true;
}
$record = ImageRecord::where("name", $fileName)->first();
if ($record != null) {
Log::info("$fileName exist in db");
return true;
}
}
public function scrapeWeiboComments($content) {
}
}