From 97fbff92115b035435a14eae732210019a6bd7a6 Mon Sep 17 00:00:00 2001 From: shixuesen Date: Sun, 18 Jul 2021 23:20:34 +0800 Subject: [PATCH] add ffmpeg --- app/Console/Commands/ComicsScrape.php | 2 +- app/Console/Commands/CommonTest.php | 6 +- app/Services/FfmpegService.php | 223 ++++++++++++++++++ app/Services/InstagramService.php | 2 +- .../ReplaceCompressedVideoService.php | 4 +- app/Services/XiurenjiService.php | 53 ++++- app/Utils/FileUtils.php | 6 + fail.log | 3 + 8 files changed, 293 insertions(+), 6 deletions(-) create mode 100644 app/Services/FfmpegService.php diff --git a/app/Console/Commands/ComicsScrape.php b/app/Console/Commands/ComicsScrape.php index 9664fdb..d3c0083 100644 --- a/app/Console/Commands/ComicsScrape.php +++ b/app/Console/Commands/ComicsScrape.php @@ -39,7 +39,7 @@ class ComicsScrape extends Command public function handle() { $comic = new ComicsService(); -// $comic->scrapeAllAlbum("https://www.003004.com/papa"); + $comic->scrapeAllAlbum("https://www.003004.com/papa"); $comic->processHomeBusinessPeople("http://homebusinesspeople.com/detail/1829683o344697.html"); // } diff --git a/app/Console/Commands/CommonTest.php b/app/Console/Commands/CommonTest.php index 117f68d..4205788 100644 --- a/app/Console/Commands/CommonTest.php +++ b/app/Console/Commands/CommonTest.php @@ -3,6 +3,7 @@ namespace App\Console\Commands; use App\Services\CommonScrapeService; +use App\Services\FfmpegService; use App\Services\FileService; use App\Utils\FileUtils; use Illuminate\Console\Command; @@ -16,7 +17,7 @@ class CommonTest extends Command * * @var string */ - protected $signature = 'common:test'; + protected $signature = 'common:test {path}'; /** * The console command description. @@ -42,6 +43,9 @@ class CommonTest extends Command */ public function handle() { + $path = $this->argument("path"); + $service = new FfmpegService(); + $service->processDir(trim($path));exit; $cache = Cache::driver("redis"); $cache->tags(["aaa", "bbb"])->put("key1", "value1"); $cache->put("key2", "value2"); diff --git a/app/Services/FfmpegService.php b/app/Services/FfmpegService.php new file mode 100644 index 0000000..69ebc57 --- /dev/null +++ b/app/Services/FfmpegService.php @@ -0,0 +1,223 @@ +mediainfo = new MediaInfo(); + $this->mediainfo->setConfig('use_oldxml_mediainfo_output_format', true); + + } + + public function handleVideos($dir = "/Users/shixuesen/Documents/tmp/柚木/2017/泡泡条纹袜/") + { + $files = scandir($dir); + foreach ($files as $file) { + if ($file == "." || $file == "..") { + continue; + } + $subDir = implode("/", [$dir, $file]); + $isDir = is_dir($subDir); + if ($isDir) { + $subFiles = scandir($subDir); + foreach ($subFiles as $subFile) { + $subPathFile = implode("/", [$subDir, $subFile]); + if (is_dir($subPathFile) || $subFile == ".DS_Store") { + continue; + } + $mime = mime_content_type($subPathFile); +// dump("file type", [$mime, $subPathFile]); +// continue; + if (strstr($mime, "video/")) { + if (is_file($subPathFile)) { + $fileInfo = pathinfo($subPathFile); + dump("fileInfo", $fileInfo); + if (ends_with($fileInfo["filename"], "-1")) { + continue; + } + if (is_file($fileInfo["dirname"] . '/' .$fileInfo["filename"] . '-1'. '.' . $fileInfo["extension"])) { + unlink($subPathFile); + continue; + } + $targetFile = $fileInfo["dirname"] . '/' .$fileInfo["filename"] . '-1'. '.' . $fileInfo["extension"]; + dump("targetFile", [$targetFile]); +// $result = shell_exec("handBrakeCli -Z 'Very Fast 720p30' -i '". $subPathFile ."' -o '". $targetFile . " && echo 'success'"); + $result = shell_exec("handBrakeCli -Z 'Very Fast 720p30' -i '". $subPathFile ."' -o '". $targetFile . "'"); + dump($result); + + } + } + } + } + } + print_r($files); + } + +// public function processDir($baseDir = "/Volumes/WD/Video/HuaVid/") + public function processDir($baseDir = "/Volumes/Backup/HuaVid/大忽悠") + { + $files = scandir($baseDir); + foreach ($files as $file) { + if ($file == "." || $file == ".." || $file == ".DS_Store") { + continue; + } + $subDir = implode("/", [$baseDir, $file]); + $isDir = is_dir($subDir); + if ($isDir) { + $this->processDir($subDir); + } else { + $this->processVideo($subDir); + } + } + } + + public function processVideo($pathFile) + { + $mime = mime_content_type($pathFile); + $mediaInfo = new MediaInfo(); + $mediaInfo->setConfig('use_oldxml_mediainfo_output_format', true); + if (strstr($mime, "video/") || strstr($mime, "application/octet-stream")) { + if (is_file($pathFile)) { + if (!$this->checkFileSize($pathFile)) { + return; + } + $fileInfo = pathinfo($pathFile); + if (ends_with($fileInfo["filename"], "-x265")) { + return; + } + if (Redis::sismember("unneed", $fileInfo["filename"])) { + return; + } + $targetFile = $fileInfo["dirname"] . '/' .$fileInfo["filename"] . '-x265'. '.' . $fileInfo["extension"]; + if (is_file($targetFile)) { + unlink($pathFile); + rename($targetFile, $pathFile); + return; + } + dump("targetFile", [$targetFile]); + $result = shell_exec("ffmpeg -i '". $pathFile ."' -c:v libx265 -vtag hvc1 '" . $targetFile . "' && echo 'ok'"); + echo $result; + if ($result == "ok") { + echo "compress work done remove the file \n"; + $oldFileSize = filesize($pathFile); + $newFileSize = filesize($targetFile); + if ($newFileSize >= $oldFileSize) { + Redis::sadd("unneed", $fileInfo["filename"]); + echo "old file size is smaller than new one, old is " . file_size($oldFileSize) . " and new is " . file_size($newFileSize) . ", now remove new one"; + unlink($targetFile); + } else { + echo "new file size is smaller than old one, new is " . file_size($newFileSize) . " and old is " . file_size($oldFileSize) . ", now remove old one"; + unlink($pathFile); + rename($targetFile, $pathFile); + } + } + } + } + } + + public function processUnCompleteDir($baseDir = "/Volumes/WD/tmp/探花系列【AI高清2K修复】大合集") +// public function processDir($baseDir = "/Volumes/Backup/iPhone nPlayer/") + { + $files = scandir($baseDir); + foreach ($files as $file) { + if ($file == "." || $file == "..") { + continue; + } + $subDir = implode("/", [$baseDir, $file]); + $isDir = is_dir($subDir); + if ($isDir) { + $this->processUnCompleteDir($subDir); + } else { + $this->processUnCompleteVideo($subDir); + } + } + } + + public function processUnCompleteVideo($pathFile) + { + //... + $mediaInfo = new MediaInfo(); + $mediaInfo->setConfig('use_oldxml_mediainfo_output_format', true); + + $mime = mime_content_type($pathFile); +// dump("file type", [$mime, $subPathFile]); +// continue; + if (strstr($mime, "video/")) { + if (is_file($pathFile)) { + $fileInfo = pathinfo($pathFile); +// dump("fileInfo", $fileInfo); + if (ends_with($fileInfo["filename"], "-1")) { + return; + } + if (is_file($fileInfo["dirname"] . '/' .$fileInfo["filename"] . '-1'. '.' . $fileInfo["extension"])) { + $mediaInfoContainer1 = $mediaInfo->getInfo($fileInfo["dirname"] . '/' .$fileInfo["filename"] . '-1'. '.' . $fileInfo["extension"]); + $millSecond1 = $mediaInfoContainer1->getGeneral()->get("duration")->getMilliseconds(); + echo gettype($millSecond1) . "\n"; +// ["duration"] . "\n"; + $mediaInfoContainer = $mediaInfo->getInfo($pathFile); + $millSecond = $mediaInfoContainer->getGeneral()->get("duration")->getMilliseconds(); + echo gettype($millSecond) . "\n"; + if (abs(intval($millSecond) - intval($millSecond1)) > 100) { + echo $pathFile . "\n"; + echo abs(intval($millSecond) - intval($millSecond1)) . "\n"; + } + +// unlink($pathFile); + return; + } +// $targetFile = $fileInfo["dirname"] . '/' .$fileInfo["filename"] . '-1'. '.' . $fileInfo["extension"]; +// dump("targetFile", [$targetFile]); +//// $result = shell_exec("handBrakeCli -Z 'Very Fast 720p30' -i '". $subPathFile ."' -o '". $targetFile . " && echo 'success'"); +// $result = shell_exec("handBrakeCli -Z 'Very Fast 720p30' -i '". $pathFile ."' -o '". $targetFile . "'"); +// dump($result); + + } + } + } + + public function checkFileDimension($file) : bool + { + $mediaContainer = $this->mediainfo->getInfo($file); + foreach ($mediaContainer->getVideos() as $video) { + $height = $video->get('height')->getAbsoluteValue(); + $width = $video->get('width')->getAbsoluteValue(); + if ($height > $width && $width <= 720) { + echo "$file 分辨率小于 720p 跳过\n"; + return false; + } + if ($height <= $width && $height <= 720) { + echo "$file 分辨率小于 720p 跳过\n"; + return false; + } + + } + return true; + } + + public function checkFileSize($file, $size = 1): bool + { + if (is_file($file) && filesize($file) > 100 * 1024 * 1024) { + return true; + } + $fileSize = FileUtils::humanFilesize(filesize($file)); + echo "$file size < 100Mb filesize is $fileSize skip \n"; + return false; + } + + + + +} + diff --git a/app/Services/InstagramService.php b/app/Services/InstagramService.php index d62b1d3..2898909 100644 --- a/app/Services/InstagramService.php +++ b/app/Services/InstagramService.php @@ -308,7 +308,7 @@ class InstagramService try { $userList = file("/Users/shixuesen/OneDrive/Pictures/instagram/user.txt"); - $userList = array_slice($userList, $start, 50); + $userList = array_slice($userList, $start, 55); // $userList = ['1992.ai_']; // print_r($userList);exit; // print_r($userList); diff --git a/app/Services/ReplaceCompressedVideoService.php b/app/Services/ReplaceCompressedVideoService.php index 72570e1..fa4283d 100644 --- a/app/Services/ReplaceCompressedVideoService.php +++ b/app/Services/ReplaceCompressedVideoService.php @@ -2,6 +2,7 @@ namespace App\Services; +use Illuminate\Support\Facades\Log; use Illuminate\Support\Facades\Redis; use Mhor\MediaInfo\MediaInfo; @@ -106,7 +107,8 @@ class ReplaceCompressedVideoService } $fileInfo = pathinfo($pathFile); - dump("fileInfo", $fileInfo); + Log::info("big size fileInfo", $fileInfo); + return; if (ends_with($fileInfo["filename"], "-XXXXX#compressed")) { return; } diff --git a/app/Services/XiurenjiService.php b/app/Services/XiurenjiService.php index e8a1dcd..001efca 100644 --- a/app/Services/XiurenjiService.php +++ b/app/Services/XiurenjiService.php @@ -24,6 +24,7 @@ class XiurenjiService public function scrapeAlbum() { + echo "111"; $pageSize = 20; $pageCount = $this->getEncodeHtmlContent("https://www.xiurenji.cc/XiuRen/index.html")->find(".page span")->htmls()->get(0); print_r($pageCount); @@ -56,6 +57,7 @@ class XiurenjiService } public function scrapeSingleAlbum($url) { + Log::info("scrapeSingleAlbum $url"); $pageContent = $this->getEncodeHtmlContent($url); $pageSize = 3; $items = $pageContent->find(".ina p:nth-child(2)")->texts(); @@ -104,7 +106,7 @@ class XiurenjiService try { mkdir($dir); } catch (Exception $e) { - Log::error($e->getTraceAsString()); + Log::error($e->getMessage()); return; } } @@ -169,10 +171,57 @@ class XiurenjiService public function getEncodeHtmlContent($url) { $attempts = 0; $html = ""; + $arrContextOptions=array( + "ssl"=>array( + "allow_self_signed"=>true, + "verify_peer"=>false, + "verify_peer_name"=>false, + ), + ); do { try { - $html = iconv('gb2312','UTF-8//IGNORE',file_get_contents($url)); + $curl = curl_init(); + + curl_setopt_array($curl, array( + CURLOPT_URL => $url, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_ENCODING => '', + CURLOPT_MAXREDIRS => 10, + CURLOPT_TIMEOUT => 0, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, + CURLOPT_CUSTOMREQUEST => 'GET', + CURLOPT_SSL_VERIFYPEER => false, + CURLOPT_SSL_VERIFYHOST => false, + + CURLOPT_HTTPHEADER => array( + 'authority: www.xiurenji.cc', + 'pragma: no-cache', + 'cache-control: no-cache', + 'sec-ch-ua: " Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"', + 'sec-ch-ua-mobile: ?0', + 'upgrade-insecure-requests: 1', + 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36', + 'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', + 'sec-fetch-site: same-origin', + 'sec-fetch-mode: navigate', + 'sec-fetch-user: ?1', + 'sec-fetch-dest: document', + 'referer: https://www.xiurenji.cc/XiuRen/', + 'accept-language: zh-CN,zh;q=0.9', + 'cookie: UM_distinctid=177fd93a0ca93c-06b94658d5d337-121a4759-13c680-177fd93a0cbcaf; ASPSESSIONIDCATDQACD=FDPMPCLAMHNCPJFCBLKFLCKH; CNZZDATA1278618868=367774893-1614867004-%7C1625926983; __51cke__=; __tins__20641871=%7B%22sid%22%3A%201625931982756%2C%20%22vd%22%3A%203%2C%20%22expires%22%3A%201625933829110%7D; __51laig__=7' + ), + )); + + $response = curl_exec($curl); + $error = curl_error($curl); + echo $error; + curl_close($curl); + +// echo $response; + $html = iconv('gb2312','UTF-8//IGNORE', $response); } catch (Exception $e) { + echo $e->getMessage(). "\n"; echo $e->getTraceAsString() . "\n"; $sleepTime = 10000 * random_int(100, 1000); echo "sleep {$sleepTime} nano second \n"; diff --git a/app/Utils/FileUtils.php b/app/Utils/FileUtils.php index 87c8164..ec7bda6 100644 --- a/app/Utils/FileUtils.php +++ b/app/Utils/FileUtils.php @@ -31,5 +31,11 @@ class FileUtils { } return $baseMap; } + + public static function humanFilesize($bytes, $decimals = 2) { + $sz = 'BKMGTP'; + $factor = floor((strlen($bytes) - 1) / 3); + return sprintf("%.{$decimals}f", $bytes / pow(1024, $factor)) . @$sz[$factor]; + } } diff --git a/fail.log b/fail.log index acb6616..5b5f882 100644 --- a/fail.log +++ b/fail.log @@ -2340,3 +2340,6 @@ /Users/shixuesen/OneDrive/Pictures/instagram/ayreen/ https://scontent-lax3-1.cdninstagram.com/v/t50.2886-16/10000000_508816636893040_1329537237971395618_n.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLjcyMC5jbGlwcy5iYXNlbGluZSJ9&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=104&_nc_ohc=XisTBYMEjw8AX-rzbId&edm=ABmJApABAAAA&vs=1431807383860924_2420627217&_nc_vs=HBksFQAYJEdJQ1dtQUJ3MV9VYXhNNEJBQ0prOE9BU2QzTVNicV9FQUFBRhUAAsgBABUAGCRHSm1SSlF6dzdIbFVMYlVBQUZCYkVfQmFrN0Z6YnFfRUFBQUYVAgLIAQAoABgAGwGIB3VzZV9vaWwBMBUAACaA7JX66%2BfFPxUCKAJDMywXQD4IcrAgxJwYEmRhc2hfYmFzZWxpbmVfMV92MREAdf4HAA%3D%3D&_nc_rid=3d82ebcf53&ccb=7-4&oe=60D49ABC&oh=232e66e71185a7be6cb9264dce920990&_nc_sid=6136e7 /Users/shixuesen/OneDrive/Pictures/instagram/ayreen/ https://scontent-lax3-2.cdninstagram.com/v/t50.2886-16/10000000_222401963043975_6831917900789398481_n.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLjcyMC5jbGlwcy5iYXNlbGluZSJ9&_nc_ht=scontent-lax3-2.cdninstagram.com&_nc_cat=103&_nc_ohc=dCHA7uAi8XgAX8xJg1h&edm=ABmJApABAAAA&vs=157567773029968_2190380134&_nc_vs=HBksFQAYJEdJQ1dtQUNITEFIX1Jjb0FBTkU3THpzaTJjOWVicV9FQUFBRhUAAsgBABUAGCRHSTVVVVF4UERCb1lDemNDQVBlVE4wOWF0YXg5YnFfRUFBQUYVAgLIAQAoABgAGwGIB3VzZV9vaWwBMBUAACa8%2B6aw5NTKPxUCKAJDMywXQD4IcrAgxJwYEmRhc2hfYmFzZWxpbmVfMV92MREAdf4HAA%3D%3D&_nc_rid=30348e2d14&ccb=7-4&oe=60D80AF6&oh=79b4475b4300445e66dc20a31af1e8af&_nc_sid=6136e7 /Users/shixuesen/OneDrive/Pictures/instagram/cxxsomi/ https://scontent-lax3-2.cdninstagram.com/v/t51.2885-15/e35/209584820_3045368299031915_637368205992932357_n.jpg?se=7&tp=1&_nc_ht=scontent-lax3-2.cdninstagram.com&_nc_cat=111&_nc_ohc=hvt2zQipCgYAX-MONpx&edm=ABmJApABAAAA&ccb=7-4&oh=7731365b37c120f750cef683e1cd0b5d&oe=60E9AA4A&_nc_sid=6136e7&ig_cache_key=MjYwNzU0MjI0MTIwMTc5NjQzNQ%3D%3D.2-ccb7-4 +/Users/shixuesen/OneDrive/Pictures/instagram/stilleecho/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/e35/212095022_332286995264001_9059643095607844293_n.jpg?se=7&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=109&_nc_ohc=68sk_4yGhaUAX90WyCU&edm=ABmJApABAAAA&ccb=7-4&oh=b954ef6e24cd1b8e8a7aacc8703d766f&oe=60F19A50&_nc_sid=6136e7&ig_cache_key=MjYxMzk1MDQ5MzcwMzkxNzI1Ng%3D%3D.2-ccb7-4 +/Users/shixuesen/OneDrive/Pictures/instagram/parlovetati/ https://scontent-lax3-2.cdninstagram.com/v/t50.2886-16/216324143_366793355069563_5612659884742046009_n.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLjcyMC5mZWVkLmRlZmF1bHQifQ&_nc_ht=scontent-lax3-2.cdninstagram.com&_nc_cat=107&_nc_ohc=el8uErawf5IAX_R-T31&edm=ABmJApABAAAA&vs=17926477252650290_2881665326&_nc_vs=HBksFQAYJEdDLVk1QXg3aUtLN21FMEJBRGx4SXBOdExPUk5ia1lMQUFBRhUAAsgBABUAGCRHSWlaeUF4SXBGeTA2T1lBQU1XM25xR21acjFsYmtZTEFBQUYVAgLIAQAoABgAGwGIB3VzZV9vaWwBMBUAACbEmtmnx8fKQBUCKAJDMywXQE37peNT988YEmRhc2hfYmFzZWxpbmVfM192MREAdeoHAA%3D%3D&_nc_rid=9f56e19b21&ccb=7-4&oe=60EE70A7&oh=581f36639d36673496a4ebbf2cbb843b&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/piamodel/ https://scontent-lax3-1.cdninstagram.com/v/t50.2886-16/98921469_348244170087176_2673062320829551911_n.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLjcyMC5zdG9yeS5kZWZhdWx0In0&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=108&_nc_ohc=DzIwu7F07X8AX9lIR5s&edm=AOVtZ6oBAAAA&vs=18021368725308316_4178254570&_nc_vs=HBkcFQAYJEdQMXI1UVVJbXczcXVUd0JBQ2ZWaHE4RW9CZ2xidXFIQUFBQRUAAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJprUncPK%2Bcw%2FFQIoAkMzLBdAFZmZmZmZmhgSZGFzaF9iYXNlbGluZV8xX3YxEQB16AcA&_nc_rid=083a1a13ca&ccb=7-4&oe=60EF5E37&oh=d2729ede6f6d58d3948973edf7077f7e&_nc_sid=bab638