From b8387bf90c9e6ef17488fdb9f86e6442195cd2a0 Mon Sep 17 00:00:00 2001 From: shixuesen Date: Mon, 21 Feb 2022 23:48:48 +0800 Subject: [PATCH] modify instagram and xiuren and so on --- .env.prod.back | 53 +++++ app/Console/Commands/AcfunScrape.php | 7 +- app/Console/Commands/BiliVideoCode.php | 19 +- app/Console/Commands/InstagramScrape.php | 1 + app/Console/Commands/RecordDir.php | 50 +++-- app/Console/Commands/RenameTest.php | 23 +- app/Console/Commands/XiurenjiScrape.php | 23 +- app/Console/Kernel.php | 41 ++-- app/Http/Controllers/WeiboController.php | 18 +- app/ImageRecord.php | 1 + app/Services/AcfunService.php | 10 +- app/Services/BilibiliServiceV2.php | 17 +- app/Services/FfmpegService.php | 15 +- app/Services/InstagramService.php | 97 ++++++++- app/Services/NewNvshenService.php | 2 + app/Services/RenameService.php | 31 ++- app/Services/WeiboService.php | 55 ++++- app/Services/XiaoyuService.php | 256 ++++++++++++++++++++++ app/Services/XiurenjiService.php | 260 +++++++++++++---------- app/Services/YouwuService.php | 256 ++++++++++++++++++++++ app/Utils/helper.php | 20 ++ fail.log | 3 + 22 files changed, 1048 insertions(+), 210 deletions(-) create mode 100644 .env.prod.back create mode 100644 app/Services/XiaoyuService.php create mode 100644 app/Services/YouwuService.php diff --git a/.env.prod.back b/.env.prod.back new file mode 100644 index 0000000..d023a3e --- /dev/null +++ b/.env.prod.back @@ -0,0 +1,53 @@ +APP_NAME=Laravel +APP_ENV=prod +APP_KEY=base64:BNE3RoAp39H4EB1jWx0c1xJI3arhkasdMAz52FsCc1U= +APP_DEBUG=true +APP_URL=http://localhost + +LOG_CHANNEL=stack + +DB_CONNECTION=mysql +#DB_HOST=127.0.0.1 +#DB_PORT=3306 +#DB_DATABASE=ins +#DB_USERNAME=root +#DB_PASSWORD=Mff@0987654321 +DB_HOST=111.231.219.223 +DB_PORT=3306 +DB_DATABASE=ins +DB_USERNAME=ns +DB_PASSWORD=949sxs949 + +BROADCAST_DRIVER=log +CACHE_DRIVER=redis +QUEUE_CONNECTION=sync +SESSION_DRIVER=file +SESSION_LIFETIME=120 + +REDIS_HOST=127.0.0.1 +REDIS_PASSWORD=null +REDIS_PORT=6379 + +MAIL_DRIVER=smtp +MAIL_HOST=smtp.mailtrap.io +MAIL_PORT=2525 +MAIL_USERNAME=null +MAIL_PASSWORD=null +MAIL_ENCRYPTION=null + +PUSHER_APP_ID= +PUSHER_APP_KEY= +PUSHER_APP_SECRET= +PUSHER_APP_CLUSTER=mt1 + +MIX_PUSHER_APP_KEY="${PUSHER_APP_KEY}" +MIX_PUSHER_APP_CLUSTER="${PUSHER_APP_CLUSTER}" + +OAUTH_APP_ID=89b50ce9-fcd4-4d6b-a830-bbfe0fa5703b +OAUTH_APP_PASSWORD=gmwsDOF38_+ycvIDAH885[) +OAUTH_REDIRECT_URI=http://localhost:8000/callback +#OAUTH_SCOPES='openid profile offline_access user.read calendars.read Files.Read.All, Files.ReadWrite.All, Sites.Read.All, Sites.ReadWrite.All' +OAUTH_SCOPES='openid profile offline_access user.read calendars.read' +OAUTH_AUTHORITY=https://login.microsoftonline.com/common +OAUTH_AUTHORIZE_ENDPOINT=/oauth2/v2.0/authorize +OAUTH_TOKEN_ENDPOINT=/oauth2/v2.0/token diff --git a/app/Console/Commands/AcfunScrape.php b/app/Console/Commands/AcfunScrape.php index ce4a201..f52f1b7 100644 --- a/app/Console/Commands/AcfunScrape.php +++ b/app/Console/Commands/AcfunScrape.php @@ -38,9 +38,12 @@ class AcfunScrape extends Command */ public function handle() { + // 几兔灰 10703951 + // 香菜猫饼 4537972 + // 小清晨儿 2277346 $service = new AcfunService(); -// $service->requestUpPageApi(4537972);exit; -// $service->queryUpUsersVideos(4537972);exit; + // $service->requestUpPageApi(10703951);exit; + // $service->queryUpUsersVideos(2277346);exit; $service->downloadVideo(); // } diff --git a/app/Console/Commands/BiliVideoCode.php b/app/Console/Commands/BiliVideoCode.php index 0461b07..1af2dc2 100644 --- a/app/Console/Commands/BiliVideoCode.php +++ b/app/Console/Commands/BiliVideoCode.php @@ -46,19 +46,20 @@ class BiliVideoCode extends Command // dump($this->arguments());exit; // $bilibili = new BilibiliServiceV2(new BilibiliVideoRepository(App::getFacadeApplication())); -// $bilibili->checkVideoHasDownload(); + // $bilibili->checkVideoHasDownload(); -// $bilibili->queryLocalUpVideoList();exit; -// $bilibili->insertDBTest();exit; -// $bilibili->queryPlayList(); + // $bilibili->queryLocalUpVideoList();exit; + // $bilibili->insertDBTest();exit; + // $bilibili->queryPlayList(); - $bilibili->queryUpVideoList(10278125); -// exit; + // $bilibili->queryUpVideoList(10278125); + // exit; - $bilibili->queryDBCollectionList(); - // $bilibili->compareAndDownloadUpVideos(true); + // $bilibili->queryDBCollectionList(); +// $bilibili->queryForVideoParts(); + $bilibili->compareAndDownloadUpVideos(true); // $bilibili->compareAndDownloadCollectionVideos(); -// exit; + exit; $bilibili->queryForVideoParts(); if (App::environment() == "local") { # code... diff --git a/app/Console/Commands/InstagramScrape.php b/app/Console/Commands/InstagramScrape.php index 0e9849d..e5c9e62 100644 --- a/app/Console/Commands/InstagramScrape.php +++ b/app/Console/Commands/InstagramScrape.php @@ -51,6 +51,7 @@ class InstagramScrape extends Command // print_r($userList);exit; $ins = new InstagramService(); // $ins->getUserNameById('4156629214');exit; + $ins->scrapeFeeds(); $ins->scrapeUsers($start); $ins->scrapeLikedUsers(); $ins->scrapeCollection();exit; diff --git a/app/Console/Commands/RecordDir.php b/app/Console/Commands/RecordDir.php index 0be3f0b..b8721a7 100644 --- a/app/Console/Commands/RecordDir.php +++ b/app/Console/Commands/RecordDir.php @@ -41,33 +41,51 @@ class RecordDir extends Command */ public function handle() { - // $path = trim($this->argument("path")); $service = new DirService(); $list = $service->recursiveScan($path); -// $i = 0; - foreach ($list["files"] as $file) { - if (strstr($file, ".DS_Store")) { + if (isset($list["files"]) && count($list["files"]) > 0) { + foreach ($list["files"] as $file) { + if (strstr($file, ".DS_Store")) { + continue; + } + $fileInfo = pathinfo($file); + try { + $innerPath = str_replace($path, "", $fileInfo["dirname"]); + $innerName = $fileInfo["basename"]; + ImageRecord::firstOrCreate(["path" => $innerPath, "name" => $innerName], + ["path" => $innerPath, + "name" => $innerName, + "type" => 2 + ] + ); + } catch (QueryException $e) { + if (!str_contains($e->getMessage(), "Duplicate entry")) { + Log::error($e->getMessage()); + } + } + unset($imageRecord); + } + } + + + foreach ($list["dirs"] as $dir) { + if (strstr($dir, ".DS_Store") || $dir == ".." || $dir == "." || str_starts_with($dir, ".")) { continue; } - $fileInfo = pathinfo($file); try { - $imageRecord = new ImageRecord(); - $imageRecord->path = str_replace($path, "", $fileInfo["dirname"]); - $imageRecord->name = $fileInfo["basename"]; - $imageRecord->type = 2;// dump($imageRecord->getAttributes()); - // $i++; - // if ($i > 100) { - // exit; - // } - // continue; - $imageRecord->save(); + ImageRecord::firstOrCreate(["path" => $path, "name" => $dir], + ["path" => $path, + "name" => $dir, + "type" => 1 + ] + ); + } catch (QueryException $e) { if (!str_contains($e->getMessage(), "Duplicate entry")) { Log::error($e->getMessage()); } } - unset($imageRecord); } } diff --git a/app/Console/Commands/RenameTest.php b/app/Console/Commands/RenameTest.php index 6fe83b3..876951c 100644 --- a/app/Console/Commands/RenameTest.php +++ b/app/Console/Commands/RenameTest.php @@ -4,6 +4,7 @@ namespace App\Console\Commands; use App\Services\RenameService; use Illuminate\Console\Command; +use function RingCentral\Psr7\str; class RenameTest extends Command { @@ -42,9 +43,27 @@ class RenameTest extends Command $path = $this->argument("path"); $prefix = $this->argument("prefix"); $rename = new RenameService(); - dump($rename->splitCustomSizeOfFolder($path, $prefix, 500));exit; +// $files = scandir($path); +// foreach ($files as $file) { +// if (str_contains($file, "KID=imgbed,photo&")) { +// $pos = strpos($file, ".jpg"); +// $newFileName = substr($file, 0, $pos + 4); +// dump("new filename ". $newFileName); +// +// rename($path . DIRECTORY_SEPARATOR . $file, $path . DIRECTORY_SEPARATOR . $newFileName); +// } +// } +// exit; + // $rename->rename($path, $prefix); +// $arr[0] = 1640486381; +// $arr[1] = 1640553754; +// $arr[2] = 1640488544; +// asort($arr); +// dump($arr);exit; + // $rename->rename($path, $prefix);exit; + $rename->splitCustomSizeOfFolder($path, $prefix, 500);exit; // $rename->rename("/Volumes/WD/tmp/写真图/猫九", "猫九-"); - $rename->rename($path, $prefix); + return; $rename->rename("/Volumes/Backup/images/写真/pcBack/3/", ""); // $rename->rename(); diff --git a/app/Console/Commands/XiurenjiScrape.php b/app/Console/Commands/XiurenjiScrape.php index ab8e41f..6b30082 100644 --- a/app/Console/Commands/XiurenjiScrape.php +++ b/app/Console/Commands/XiurenjiScrape.php @@ -2,7 +2,9 @@ namespace App\Console\Commands; +use App\Services\XiaoyuService; use App\Services\XiurenjiService; +use App\Services\YouwuService; use Illuminate\Console\Command; class XiurenjiScrape extends Command @@ -12,7 +14,7 @@ class XiurenjiScrape extends Command * * @var string */ - protected $signature = 'xiuren:s'; + protected $signature = 'xiuren:s {all} {site} {num} {start}'; /** * The console command description. @@ -39,8 +41,25 @@ class XiurenjiScrape extends Command public function handle() { // + $isAll = $this->argument('all'); + $site = $this->argument("site"); + $num = $this->argument("num"); + $start = $this->argument("start"); + // if ($site == "xiuren") { $service = new XiurenjiService(); + // } else if ($site == "xiaoyu") { + // $service = new XiaoyuService(); + // } else if ($site == "youwu"){ + // $service = new YouwuService(); + // } + + if ($isAll == "1") { + $service->scrapeAll(); + } else { + $service->scrapeAlbum($site, $num, $start); + } + // $service->scrapeSingleAlbum("https://www.xiurenji.vip/XiuRen/7828.html");exit; - $service->scrapeAlbum(); + } } diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index 887270b..51b0ef9 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -2,7 +2,6 @@ namespace App\Console; -use App; use App\Services\BilibiliService; use App\Services\BilibiliServiceV2; use App\Services\CompressImageService; @@ -13,10 +12,12 @@ use App\Services\TujiguService; use App\Services\XiuGirlsService; use App\Services\VitabioticsService; use App\Services\YouKnowService; +use App\Repositories\BilibiliVideoRepository; use Illuminate\Console\Scheduling\Schedule; use Illuminate\Foundation\Console\Kernel as ConsoleKernel; use App\Services\InstagramService; use Illuminate\Support\Facades\Log; +use Illuminate\Support\Facades\App; date_default_timezone_set('PRC'); @@ -40,44 +41,44 @@ class Kernel extends ConsoleKernel protected function schedule(Schedule $schedule) { $schedule->call(function () { - if ((rand(0, 99) % 2) == 0) { - exit(); - } - if(date('H') % 4 != 0) { - Log::info("schedule queryUpVideoList skipped at: " .date("Y-m-d H:i:s")); - exit; - } - $bilibili = new BilibiliServiceV2(); + if ((rand(0, 99) % 2) == 0) { + exit(); + } + if(date('H') % 3 != 0) { + Log::info("schedule queryUpVideoList skipped at: " .date("Y-m-d H:i:s")); + exit; + } + $bilibili = new BilibiliServiceV2(new BilibiliVideoRepository(App::getFacadeApplication())); Log::info("schedule queryUpVideoList started at: ". date("Y-m-d H:i:s")); $bilibili->queryUpVideoList(); })->hourlyAt(5); // })->yearly(); $schedule->call(function () { - if ((rand(0, 99) % 2) == 1) { - exit(); - } - if(date('H') % 4 != 0) { - Log::info("schedule queryDBCollectionList skipped at: " .date("Y-m-d H:i:s")); - exit; - } - $bilibili = new BilibiliServiceV2(); + if ((rand(0, 99) % 2) == 1) { + exit(); + } + if(date('H') % 3 != 0) { + Log::info("schedule queryDBCollectionList skipped at: " .date("Y-m-d H:i:s")); + exit; + } + $bilibili = new BilibiliServiceV2(new BilibiliVideoRepository(App::getFacadeApplication())); Log::info("schedule queryDBCollectionList started at: ". date("Y-m-d H:i:s")); $bilibili->queryDBCollectionList(); })->hourlyAt(35); // })->yearly(); $schedule->call(function () { - $bilibili = new BilibiliServiceV2(); + $bilibili = new BilibiliServiceV2(new BilibiliVideoRepository(App::getFacadeApplication())); Log::info("schedule queryForVideoParts started at: ". date("Y-m-d H:i:s")); $bilibili->queryForVideoParts(); })->dailyAt("02:00"); $schedule->call(function () { - $bilibili = new BilibiliServiceV2(); + $bilibili = new BilibiliServiceV2(new BilibiliVideoRepository(App::getFacadeApplication())); Log::info("schedule compareAndDownloadUpVideos started at: ". date("Y-m-d H:i:s")); // $bilibili->compareAndDownloadUpVideos(); })->dailyAt('03:00'); $schedule->call(function () { - $bilibili = new BilibiliServiceV2(); + $bilibili = new BilibiliServiceV2(new BilibiliVideoRepository(App::getFacadeApplication())); Log::info("schedule compareAndDownloadCollectionVideos started at: ". date("Y-m-d H:i:s")); // $bilibili->compareAndDownloadCollectionVideos(); })->dailyAt('04:00'); diff --git a/app/Http/Controllers/WeiboController.php b/app/Http/Controllers/WeiboController.php index aa0d021..bc28b88 100644 --- a/app/Http/Controllers/WeiboController.php +++ b/app/Http/Controllers/WeiboController.php @@ -30,6 +30,12 @@ class WeiboController extends Controller // https://m.weibo.cn/api/container/getIndex?containerid=230259&openApp=0&page= // 物理 2304133907143723 + // 230259 自己 + // 轮子哥 2304131916825084 + // 徐圣佑 5893812490 + // 徐圣佑- 新号 1076035893812490 + + $url = array(); @@ -38,11 +44,11 @@ class WeiboController extends Controller // $url[] = 'https://m.weibo.cn/api/container/getIndex?containerid=1076035893812490&openApp=0&page='.$i; // for( $i = 100; $i >= 1 ; $i-- ) // $url[] = 'https://m.weibo.cn/api/container/getIndex?containerid=2304133907143723&openApp=0&page='.$i; - // for ($i = 5; $i >= 1; $i--) { - // $url[] = 'https://m.weibo.cn/feed/group?gid=4423532052076817&&page=' . $i; - // } - for ($i = 1; $i < 10; $i++) { - $url[] = 'https://m.weibo.cn/api/container/getIndex?containerid=230259&&page=' . $i; + // for ($i = 10; $i >= 1; $i--) { + // $url[] = 'https://m.weibo.cn/feed/group?gid=4423532052076817&&page=' . $i; + // } + for ($i = 5; $i >= 1; $i--) { + $url[] = 'https://m.weibo.cn/api/container/getIndex?containerid=2304131916825084&page=' . $i; } return response()->json($url); } @@ -69,7 +75,7 @@ class WeiboController extends Controller // Log::info($request->input()); $weibo = new WeiboService(); $result = $weibo->scrapeWeiboPicAndVideo($request->input("content")); -// $result = $weibo->scrapeGroupWeiboPicAndVideo($request->input("content")); + // $result = $weibo->scrapeGroupWeiboPicAndVideo($request->input("content")); return response()->json($result); } diff --git a/app/ImageRecord.php b/app/ImageRecord.php index a1dcadf..f800fc3 100644 --- a/app/ImageRecord.php +++ b/app/ImageRecord.php @@ -27,4 +27,5 @@ use Illuminate\Database\Eloquent\Model; class ImageRecord extends Model { // + protected $guarded = ['']; } diff --git a/app/Services/AcfunService.php b/app/Services/AcfunService.php index 96df74b..34ae7df 100644 --- a/app/Services/AcfunService.php +++ b/app/Services/AcfunService.php @@ -128,7 +128,7 @@ class AcfunService public function downloadVideo() { $innerDir = "/Volumes/Crucial X6/Video/acfun"; - $list = AcfunVideo::all(); + $list = AcfunVideo::orderBy('created_at', 'desc')->get(); $files = scandir($innerDir); // dump($files); @@ -142,7 +142,7 @@ class AcfunService } Log::info($item['title'] . " (" . $item["from_up_name"] . ").mp4" . " does not exists to download"); // exit; - $downloadResult = shell_exec('cd "' .$innerDir .'" && annie https://www.acfun.cn/v/ac' . $item["content_id"]); + $downloadResult = shell_exec('cd "' .$innerDir .'" && you-get https://www.acfun.cn/v/ac' . $item["content_id"]); Log::info($downloadResult); $item["is_downloaded"] = 1; $item->save(); @@ -166,13 +166,13 @@ class AcfunService "title" => $titles[$key], "from_type" => 2, "from_collection_name" => "", - "from_up_name" => "香菜猫饼", + "from_up_name" => "几兔灰", "from_up_user_id" => $upId ]); } $queried += $result["pageSize"]; Log::info("current queried: " . $queried); - if ($result["noMore"] || $queried > (int)$result["totalCount"]) { + if (array_key_exists("noMore", $result) || $queried > (int)$result["totalCount"]) { break; } $pCursor = $result["pcursor"]; @@ -218,7 +218,7 @@ class AcfunService } - public function requestUpPageApi($upId, $pcursor = "") { +public function requestUpPageApi($upId, $pcursor = "") { if ($upId == null) { return ""; } diff --git a/app/Services/BilibiliServiceV2.php b/app/Services/BilibiliServiceV2.php index ee56bf2..9a6ba87 100644 --- a/app/Services/BilibiliServiceV2.php +++ b/app/Services/BilibiliServiceV2.php @@ -28,7 +28,10 @@ class BilibiliServiceV2 private $baseDir = "/Volumes/intel660p/video/mv/"; - private $remoteDir = "/data/"; + // private $remoteDir = "/data/"; + private $remoteDir = "/Volumes/Crucial X6/Video/"; + + protected $repository; @@ -250,8 +253,9 @@ class BilibiliServiceV2 public function compareAndDownloadUpVideos($isAll = false) { $env = App::environment(); - $list = BilibiliUpVideos::all(); -// $list = array_slice($list->all(), 15, 5); + $list = BilibiliUpVideos::orderBy("created_at", "desc")->get(); + $list = array_slice($list->all(), 0, 2); + // dump($list);exit; foreach ($list as $item) { dump("当前 up名称是: " . $item["up_name"] . "\n"); if ($item["is_downloaded"] == 1) { @@ -656,7 +660,7 @@ class BilibiliServiceV2 public function queryForVideoParts() { $i = 1; - $list = BilibiliVideos::orderBy('id', 'desc')->simplePaginate(200, null, 'page', $i); + $list = BilibiliVideos::orderBy('id', 'desc')->simplePaginate(2000, null, 'page', $i); // dump($list->items()[0]->aid); while ($list->isNotEmpty()) { foreach ($list->items() as $item) { @@ -972,8 +976,9 @@ done && echo "ok"'); $downloadResult = shell_exec('cd "' . $innerDir . '" && url="https://www.bilibili.com/video/av' . $aid . '?p=" for i in $(seq 1 ' . $parts . ') do -annie -c "SESSDATA=94247a4e%2C1651981649%2C1dba1%2Ab1;" $url$i +lux -c "SESSDATA=94247a4e%2C1651981649%2C1dba1%2Ab1;" $url$i done && echo "ok"'); +// } Log::info($downloadResult); Log::info("$aid current download result: " . $downloadResult); @@ -1055,7 +1060,7 @@ done && echo "ok"'); dump($list); } - public function checkDiskSpace($dir = "/data") + public function checkDiskSpace($dir = "/Volumes/Crucial X6/Video/bilibili/") { if (disk_free_space($dir) > 5 * 1024 * 1024 * 1024) { return true; diff --git a/app/Services/FfmpegService.php b/app/Services/FfmpegService.php index 88afe96..493b642 100644 --- a/app/Services/FfmpegService.php +++ b/app/Services/FfmpegService.php @@ -119,11 +119,13 @@ class FfmpegService Log::info("in uneed: " . $fileInfo["filename"]); return; } - if (!$this->checkFileSize($pathFile)) { + if (Redis::sismember("sizeSmall", $fileInfo["filename"]) || !$this->checkFileSize($pathFile)) { + Redis::sadd("sizeSmall", $fileInfo["filename"]); Log::info("filesize: " . $fileInfo["filename"]); return; } - if ($this->checkFileEncodeType($pathFile)) { + if (Redis::sismember("hasEncode", $fileInfo["filename"]) || $this->checkFileEncodeType($pathFile)) { + Redis::sadd("hasEncode", $fileInfo["filename"]); Log::info("$pathFile has already encode by h265 return"); return; } @@ -140,6 +142,7 @@ class FfmpegService } $targetFile = $fileInfo["dirname"] . '/' .$fileInfo["filename"] . '-x265'. '.' . $fileInfo["extension"]; if (is_file($targetFile)) { + Log::info("$targetFile is exists"); unlink($pathFile); rename($targetFile, $pathFile); return; @@ -147,8 +150,8 @@ class FfmpegService dump("targetFile", [$targetFile]); Log::info("process target file : $targetFile"); $result = shell_exec("ffmpeg -threads 4 -i ". escapeshellarg($pathFile) ." -preset ultrafast -c:v libx265 -vtag hvc1 " . escapeshellarg($targetFile) . " && echo 'ok'"); - echo $result; - return; + // echo $result; + // return; if (trim($result) == "ok") { echo "compress work done remove the file \n"; Log::info("compress work done remove the file"); @@ -253,6 +256,7 @@ class FfmpegService public function checkFileSize($file, $size = 1): bool { + if (is_file($file) && filesize($file) > 1 * 1024 * 1024) { return true; } @@ -270,7 +274,8 @@ class FfmpegService ->videos() // filters video streams ->first() // returns the first video stream ->get('codec_name'); - } catch (\Exception $e) { + } catch (\Throwable $e) { + echo "error $file \n"; Log::error("ffprobe has error just return false for test, exception: ". $e->getMessage()); return false; } diff --git a/app/Services/InstagramService.php b/app/Services/InstagramService.php index 2898909..79ea2af 100644 --- a/app/Services/InstagramService.php +++ b/app/Services/InstagramService.php @@ -152,6 +152,11 @@ class InstagramService if ($pos > 0) { $filename = substr($filename, 0, $pos); } + if (file_exists($filePrefix . $filename) && $fileNamePrefix != null) { + rename($filePrefix . $filename, $filePrefix . $fileNamePrefix . $filename); + echo "\n file exists and has rename to " . $filePrefix . $fileNamePrefix . $filename; + return 0; + } $filename = $fileNamePrefix . $filename; // if ($filename == "33020038_640464766303508_27725890796388352_n.jpg"){ // $flag = 1; @@ -239,17 +244,18 @@ class InstagramService $response = $ig->media->getLikedFeed(); foreach ($response->getItems() as $item) { //echo json_encode($response->getItems());exit; + $userName = $item->getUser()->getUsername() . "_"; switch ($item->getMediaType()) { case Item::PHOTO: $imageUrl = $item->getImageVersions2()->getCandidates()[0]->getUrl(); - $res = $this->downloadFile($imageUrl, 0, $baseImageDir); + $res = $this->downloadFile($imageUrl, 0, $baseImageDir, $userName); if ($res == 0) { return; } break; case Item::VIDEO: $videoUrl = $item->getVideoVersions()[0]->getUrl(); - $res = $this->downloadFile($videoUrl, 0, $baseImageDir); + $res = $this->downloadFile($videoUrl, 0, $baseImageDir, $userName); if ($res == 0) { return; } @@ -259,7 +265,7 @@ class InstagramService // exit; foreach ($item->getCarouselMedia() as $imageItem) { $imageUrl = $imageItem->getImageVersions2()->getCandidates()[0]->getUrl(); - $res = $this->downloadFile($imageUrl, 0, $baseImageDir); + $res = $this->downloadFile($imageUrl, 0, $baseImageDir, $userName); if ($res == 0) { return; } @@ -287,6 +293,91 @@ class InstagramService } } + public function scrapeFeeds() + { + $ig = new Instagram($this->debug, $this->truncatedDebug); + + try { + $ig->login($this->username, $this->password); + } catch + (\Exception $e) { + echo 'Something went wrong: ' . $e->getMessage() . "\n"; + exit(0); + } + $count = 0; + + $baseImageDir = "/Users/shixuesen/OneDrive/Pictures/instagram/Likes/"; + try { + $maxId = null; + do { + $response = $ig->timeline->getTimelineFeed($maxId); + foreach ($response->getFeedItems() as $item) { + if ($item->getMediaOrAd() == null || $item->getMediaOrAd()->getProductType() == "ad") { + continue; + } +// else{ +// echo json_encode($item->getMediaOrAd()) . "\n"; +// echo "product type: " . $item->getMediaOrAd()->getProductType(). "\n"; +// echo $item->getMediaOrAd()->getUser()->getUsername() . "\n"; +// echo "ad id "; +// echo $item->getMediaOrAd()->getAdId() ; +// echo "\n"; +// echo "is add4ad ". $item->isAd4ad() . "\n"; +// echo "is add link type" . $item->isAdLinkType() . "\n"; +// echo "is media or ad" . $item->isMediaOrAd() . "\n"; +// } +// if () + $userName = $item->getMediaOrAd()->getUser()->getUsername() . "_"; + + switch ($item->getMediaOrAd()->getMediaType()) { + case Item::PHOTO: + $imageUrl = $item->getMediaOrAd()->getImageVersions2()->getCandidates()[0]->getUrl(); + $res = $this->downloadFile($imageUrl, 0, $baseImageDir, $userName); + // if ($res == 0) { + // return; + // } + break; + case Item::VIDEO: + $videoUrl = $item->getMediaOrAd()->getVideoVersions()[0]->getUrl(); + $res = $this->downloadFile($videoUrl, 0, $baseImageDir, $userName); + // if ($res == 0) { + // return; + // } + break; + case Item::CAROUSEL: + foreach ($item->getMediaOrAd()->getCarouselMedia() as $imageItem) { + $imageUrl = $imageItem->getImageVersions2()->getCandidates()[0]->getUrl(); + $res = $this->downloadFile($imageUrl, 0, $baseImageDir, $userName); + // if ($res == 0) { + // return; + // } + } + break; + } + $count++; + if ($count > 200) { + return; + } + + } + // Now we must update the maxId variable to the "next page". + // This will be a null value again when we've reached the last page! + // And we will stop looping through pages as soon as maxId becomes null. + $maxId = $response->getNextMaxId(); + echo "\n new maxId: " . $maxId . "\n"; + + // Sleep for 5 seconds before requesting the next page. This is just an + // example of an okay sleep time. It is very important that your scripts + // always pause between requests that may run very rapidly, otherwise + // Instagram will throttle you temporarily for abusing their API! + echo "\n Sleeping for 5s...\n"; + sleep(5 * random_int(1, 10)); + } while ($maxId != null); + } catch (\Exception $e) { + echo 'Something went wrong: ' . $e->getMessage() . "\n"; + } + } + public function scrapeUsers($start = 0) { $ig = new Instagram($this->debug, $this->truncatedDebug); diff --git a/app/Services/NewNvshenService.php b/app/Services/NewNvshenService.php index 54c95a4..41f6179 100644 --- a/app/Services/NewNvshenService.php +++ b/app/Services/NewNvshenService.php @@ -13,6 +13,7 @@ use Illuminate\Support\Facades\Storage; class NewNvshenService { + // 夏西cici 28139 private static $name_dir = [ "周韦彤" => [ @@ -191,6 +192,7 @@ class NewNvshenService $page = $mainQl->get($baseUrl . $album, [], ['maxTry' => 5])->find(".albumInfo > span")->htmls(); $title = $mainQl->get($baseUrl . $album, [], ['maxTry' => 5])->find(".albumTitle > #htilte")->htmls(); $imageSource = $mainQl->get($baseUrl . $album, [], ['maxTry' => 5])->find("#hgallery img:nth-child(1)")->attr("src"); + dump($imageSource);exit; $imageSourceParts = pathinfo($imageSource, PATHINFO_DIRNAME); break; } catch (ConnectException $e) { diff --git a/app/Services/RenameService.php b/app/Services/RenameService.php index f6a88a4..ea7badb 100644 --- a/app/Services/RenameService.php +++ b/app/Services/RenameService.php @@ -131,23 +131,35 @@ class RenameService public function splitCustomSizeOfFolder($dir = "", $prefix = "", $size = 500) { $files = $this->recordAllFiles($dir, $prefix); + asort($files, SORT_NUMERIC); + $files = array_keys($files); $allFileNum = count($files); $folderNum = ceil(count($files) / $size); - for ($i=0; $i < $folderNum; $i++) { + for ($i=0; $i < $folderNum; $i++) { # code... $currentDirName = $dir . DIRECTORY_SEPARATOR . $prefix . "_00" .$i; if (!is_dir($currentDirName)) { mkdir($currentDirName); } - for ($j=0 + $i * $size; $j < ($i + 1) * $size && $j < $allFileNum; $j++) { + for ($j=0 + $i * $size; $j < ($i + 1) * $size && $j < $allFileNum; $j++) { $fileInfo = pathinfo($files[$j]); - if (is_file($currentDirName . DIRECTORY_SEPARATOR . $fileInfo['basename'])) { - echo "file " . $currentDirName . DIRECTORY_SEPARATOR . $fileInfo['basename'] . " already exists\n"; - echo "now rename {$files[$j]} to " . $currentDirName . DIRECTORY_SEPARATOR . $fileInfo['filename'] . "_1." . $fileInfo['extension'] . "\n"; - rename($files[$j], $currentDirName . DIRECTORY_SEPARATOR . $fileInfo['filename'] . "_1." . $fileInfo['extension']); - } else{ - rename($files[$j], $currentDirName . DIRECTORY_SEPARATOR . $fileInfo['basename']); + $newFileName = $currentDirName . DIRECTORY_SEPARATOR . $fileInfo['filename']; + $t = 0; + $trueNewFileName = $newFileName . "." . $fileInfo["extension"]; + + while (is_file($trueNewFileName)) { + echo "file " . $trueNewFileName . " already exists\n"; + $trueNewFileName = $newFileName . "_" . $t++ . "." . $fileInfo["extension"]; + echo "now rename {$files[$j]} to " . $trueNewFileName . "\n"; } + rename($files[$j], $trueNewFileName); +// if (is_file($currentDirName . DIRECTORY_SEPARATOR . $fileInfo['basename'])) { +// echo "file " . $currentDirName . DIRECTORY_SEPARATOR . $fileInfo['basename'] . " already exists\n"; +// echo "now rename {$files[$j]} to " . $currentDirName . DIRECTORY_SEPARATOR . $fileInfo['filename'] . "_1." . $fileInfo['extension'] . "\n"; +// rename($files[$j], $currentDirName . DIRECTORY_SEPARATOR . $fileInfo['filename'] . "_1." . $fileInfo['extension']); +// } else{ +// rename($files[$j], $currentDirName . DIRECTORY_SEPARATOR . $fileInfo['basename']); +// } } } } @@ -164,7 +176,8 @@ class RenameService $trueFiles = array_merge($trueFiles, $this->recordAllFiles($dir . DIRECTORY_SEPARATOR . $file, $prefix)); } if (is_file($dir . DIRECTORY_SEPARATOR . $file)) { - $trueFiles[] = $dir . DIRECTORY_SEPARATOR . $file; + $filetime = filectime($dir . DIRECTORY_SEPARATOR . $file); + $trueFiles[$dir . DIRECTORY_SEPARATOR . $file] = $filetime; } } } diff --git a/app/Services/WeiboService.php b/app/Services/WeiboService.php index f827d9b..54d1ff4 100644 --- a/app/Services/WeiboService.php +++ b/app/Services/WeiboService.php @@ -1,6 +1,7 @@ files = array_merge($this->files, scanFilesWithoutPath($this->imageDir)); + $this->files = array_merge($this->files, scanFilesWithoutPath($this->videoDir)); + } public function scrapeWeiboPicAndVideo($content) { @@ -113,8 +123,10 @@ class WeiboService public function scrapeGroupWeiboPicAndVideo($content) { - $video_dir = "/Volumes/Samsung/weibo/video"; - $image_dir = "/Volumes/Samsung/weibo/image"; + // $video_dir = "/Volumes/Samsung/weibo/video"; + // $image_dir = "/Volumes/Samsung/weibo/image"; + $video_dir = "/Volumes/Crucial X6/Image/weibo/video/"; + $image_dir = "/Volumes/Crucial X6/Image/weibo/image/"; try { if (strlen($content) > 0) { @@ -203,10 +215,10 @@ class WeiboService return ((float)$usec + (float)$sec); } - function process_pic($pics, $subdir, $user, $text) + function process_pic($pics, $subDir, $user, $text) { - if (!file_exists($subdir)) { - mkdir($subdir); + if (!file_exists($subDir)) { + mkdir($subDir); } $h2w = 0; foreach ($pics as $pic) { @@ -224,17 +236,21 @@ class WeiboService if ($h2w > 15) { continue; } - $pic_name = pathinfo($pic_url, PATHINFO_FILENAME); - $pic_ext = pathinfo($pic_url, PATHINFO_EXTENSION); + $picName = pathinfo($pic_url, PATHINFO_FILENAME); + $picExt = pathinfo($pic_url, PATHINFO_EXTENSION); $user_name = Arr::get($user, "screen_name", ''); if ($user_name != '') { - $pic_name = $user_name . '--' .$pic_name; + $picName = $user_name . '--' .$picName; } - $file_name = $subdir . "/" . $pic_name . "." . $pic_ext; - if (!file_exists($file_name)) { + $file_name = $subDir . DIRECTORY_SEPARATOR . $picName . "." . $picExt; + $baseName = $picName . "." . $picExt; + if (!file_exists($file_name) && !$this->checkFileHasDownload($baseName)) { $pic_content = file_get_contents($pic_url); // echo $pic_content; file_put_contents($file_name, $pic_content); + $this->files[] = $baseName; + } else { + Log::info("$baseName file exists"); } } } @@ -261,10 +277,27 @@ class WeiboService // $video_origin_name = pathinfo($video_url, PATHINFO_FILENAME); // $video_ext = pathinfo($video_url, PATHINFO_EXTENSION); $file_name = $subdir . "/" . $video_name . "--" . $video_origin_name; - if (!file_exists($file_name)) { + $baseName = $video_name . "--" . $video_origin_name; + if (!file_exists($file_name) && !$this->checkFileHasDownload($baseName)) { # code... $video_content = file_get_contents($video_url); file_put_contents($file_name, $video_content); + $this->files[] = $baseName; + } else { + Log::info("$baseName file exists"); + } + } + + private function checkFileHasDownload($fileName) + { + if (in_array($fileName, $this->files)) { + Log::info("$fileName exist in local files"); + return true; + } + $record = ImageRecord::where("name", $fileName)->first(); + if ($record != null) { + Log::info("$fileName exist in db"); + return true; } } diff --git a/app/Services/XiaoyuService.php b/app/Services/XiaoyuService.php new file mode 100644 index 0000000..b34f2bf --- /dev/null +++ b/app/Services/XiaoyuService.php @@ -0,0 +1,256 @@ +queryInstance = QueryList::getInstance(); + $this->queryNew = new QueryList(); + } + + public function scrapeAlbum() + { + $pageSize = 20; + $pageCount = $this->getEncodeHtmlContent("https://www.xiurenb.net/XiaoYu/index.html")->find(".page span strong")->htmls()->get(0); + print_r($pageCount); + if ((int)$pageCount > 0) { + $pageCount = 20; + for ($i = 0; $i <= ceil($pageCount / $pageSize); $i++) { + $urlSuffix = ""; + if ($i == 0) { + $urlSuffix = "index.html"; + } else { + $urlSuffix = "index" . $i . ".html"; + } + $this->scrapePageAlbum($this->xiurenRootUrl . $urlSuffix); + } + } + } + + public function scrapePageAlbum($url) + { + + $pageContent = $this->getEncodeHtmlContent($url); + $items = $pageContent->find(".i_list a")->getElements(); + $i = 0; + foreach ($items as $item) { + // $i++; + // if ($i < 11) { + // continue; + // # code... + // } + dump($item->getAttribute("href")); + $this->scrapeSingleAlbum($this->domainUrl . $item->getAttribute("href")); + } + } + + public function scrapeSingleAlbum($url) + { + Log::info("scrapeSingleAlbum $url"); + $pageContent = $this->getEncodeHtmlContent($url); + $albumName = $pageContent->find(".item_title h1")->htmls()->get(0); + $pageItems = $pageContent->find(".content:eq(0) .page a")->attrs("href")->all(); + if ($this->checkAlbumHasDownload($albumName)) { + Log::info("已经下载过了,相册名:" . $albumName); + return; + } + $imageNo = 1; + $this->parseContent($this->rootDir . $albumName, $pageContent, $imageNo); + $pageItems = array_slice($pageItems, 2, count($pageItems) - 3); + foreach ($pageItems as $item) { + $pageContent = $this->getEncodeHtmlContent($this->domainUrl . $item); + $this->parseContent($this->rootDir . $albumName, $pageContent, $imageNo); + } + } + + public function parseContent($dir, $pageContent, &$imageNo) + { + if (!is_dir($dir)) { + try { + mkdir($dir); + } catch (Exception $e) { + Log::error($e->getMessage()); + return; + } + } + $images = $pageContent->find(".content p img")->getElements(); + $user = $pageContent->find(".item_info div a:eq(-1) span")->htmls()->get(0); + dump("user is " . $user); + foreach ($images as $image) { + usleep(random_int(10, 100) * 100); + $imageUrl = $image->getAttribute("src"); + $trueImageUrl = "https://www.xiurenji.net" . $imageUrl; + $fileInfo = pathinfo($trueImageUrl); + if (file_exists($dir . DIRECTORY_SEPARATOR . $fileInfo["basename"])) { + rename($dir . DIRECTORY_SEPARATOR . $fileInfo["basename"], $dir . DIRECTORY_SEPARATOR . $imageNo . "-" . $fileInfo["basename"]); + $imageNo++; + continue; + } + if (file_exists($dir . DIRECTORY_SEPARATOR . $imageNo . "-" . $fileInfo["basename"])) { + rename($dir . DIRECTORY_SEPARATOR . $imageNo . "-" . $fileInfo["basename"], $dir . DIRECTORY_SEPARATOR . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"]); + $imageNo++; + continue; + } + if (file_exists($dir . DIRECTORY_SEPARATOR . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"])) { + $imageNo++; + continue; + } + dump($fileInfo); + $attempts = 0; + $content = ""; + do { + try { + $curl_handle = curl_init(); + curl_setopt($curl_handle, CURLOPT_URL, $trueImageUrl); + curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT, 20000); + curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($curl_handle, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'); + curl_setopt($curl_handle, CURLOPT_REFERER, $this->xiurenRootUrl); + curl_setopt($curl_handle, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($curl_handle, CURLOPT_ENCODING, ''); + curl_setopt($curl_handle, CURLOPT_MAXREDIRS, 10); + curl_setopt($curl_handle, CURLOPT_TIMEOUT, 0); + curl_setopt($curl_handle, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); + curl_setopt($curl_handle, CURLOPT_CUSTOMREQUEST, 'GET'); + curl_setopt($curl_handle, CURLOPT_HTTPHEADER, array( + 'authority: www.xiurenji.net', + 'pragma: no-cache', + 'cache-control: no-cache', + 'sec-ch-ua: "Google Chrome";v="95", "Chromium";v="95", ";Not A Brand";v="99"', + 'sec-ch-ua-mobile: ?0', + 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36', + 'sec-ch-ua-platform: "macOS"', + 'accept: image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', + 'sec-fetch-site: same-origin', + 'sec-fetch-mode: no-cors', + 'sec-fetch-dest: image', + 'referer: https://www.xiurenji.net/XiuRen/9483.html', + 'accept-language: zh-CN,zh;q=0.9', + 'cookie: UM_distinctid=17cfa8bea8eb9e-0dd0c6d032d0fc-1c306851-13c680-17cfa8bea8fc85; CNZZDATA1278618868=1505121253-1636283360-%7C1636283360; __51cke__=; ASPSESSIONIDQAQAATSQ=LBLGNPMDHKKMNOPDBCEAPIMH; __tins__20641871=%7B%22sid%22%3A%201636291046220%2C%20%22vd%22%3A%202%2C%20%22expires%22%3A%201636292852634%7D; __51laig__=2' + )); + $content = curl_exec($curl_handle); + if ($content === false) { + $le = new Exception("get image has error: " . curl_error($curl_handle)); + curl_close($curl_handle); + throw $le; + } + curl_close($curl_handle); + // $content = file_get_contents($trueImageUrl); + } catch (ErrorException|Exception $e) { + echo $e->getTraceAsString() . "\n"; + $sleepTime = 10000 * random_int(100, 1000); + echo "wait for $trueImageUrl sleep {$sleepTime} nano second \n"; + usleep($sleepTime); + $attempts++; + continue; + } + break; + } while ($attempts < 100); + if ($content != "") { + file_put_contents($dir . DIRECTORY_SEPARATOR . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"], $content); + } else { + Log::error("image content is empty " . $trueImageUrl); + } + $imageNo++; + // dump("current imageNo: " . $imageNo); + } + } + + public function getEncodeHtmlContent($url) + { + $attempts = 0; + $html = ""; + $arrContextOptions = array( + "ssl" => array( + "allow_self_signed" => true, + "verify_peer" => false, + "verify_peer_name" => false, + ), + ); + do { + try { + $curl = curl_init(); + + curl_setopt_array($curl, array( + CURLOPT_URL => $url, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_ENCODING => 'UTF-8', + CURLOPT_MAXREDIRS => 10, + CURLOPT_TIMEOUT => 0, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, + CURLOPT_CUSTOMREQUEST => 'GET', + CURLOPT_SSL_VERIFYPEER => false, + CURLOPT_SSL_VERIFYHOST => false, + + CURLOPT_HTTPHEADER => array( + 'authority: www.xiurenji.net', + 'pragma: no-cache', + 'cache-control: no-cache', + 'sec-ch-ua: " Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"', + 'sec-ch-ua-mobile: ?0', + 'upgrade-insecure-requests: 1', + 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36', + 'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', + 'sec-fetch-site: same-origin', + 'sec-fetch-mode: navigate', + 'sec-fetch-user: ?1', + 'sec-fetch-dest: document', + 'referer: https://www.xiurenji.net/XiuRen/', + 'accept-language: zh-CN,zh;q=0.9', + 'cookie: UM_distinctid=177fd93a0ca93c-06b94658d5d337-121a4759-13c680-177fd93a0cbcaf; ASPSESSIONIDCATDQACD=FDPMPCLAMHNCPJFCBLKFLCKH; CNZZDATA1278618868=367774893-1614867004-%7C1625926983; __51cke__=; __tins__20641871=%7B%22sid%22%3A%201625931982756%2C%20%22vd%22%3A%203%2C%20%22expires%22%3A%201625933829110%7D; __51laig__=7' + ), + )); + + $response = curl_exec($curl); + $error = curl_error($curl); + echo $error; + curl_close($curl); + + // echo $response; + // $html = iconv('gb2312','UTF-8//IGNORE', $response); + $html = iconv_gbk_to_uft8($response); + $this->queryNew->setHtml($response); + // $html = $response; + } catch (Exception $e) { + echo $e->getMessage() . "\n"; + echo $e->getTraceAsString() . "\n"; + $sleepTime = 10000 * random_int(100, 1000); + echo "sleep {$sleepTime} nano second \n"; + usleep($sleepTime); + $attempts++; + continue; + } + break; + } while ($attempts < 100); + dump("current url: " . $url); + return $this->queryInstance->setHtml($html); + } + + + private function checkAlbumHasDownload($albumName) + { + $record = ImageRecord::where("name", $albumName)->first(); + if ($record != null) { + return true; + } + } +} diff --git a/app/Services/XiurenjiService.php b/app/Services/XiurenjiService.php index fb46801..df91a32 100644 --- a/app/Services/XiurenjiService.php +++ b/app/Services/XiurenjiService.php @@ -4,6 +4,7 @@ namespace App\Services; +use App\ImageRecord; use Exception; use ErrorException; use Log; @@ -11,97 +12,124 @@ use QL\QueryList; class XiurenjiService { - public $domainUrl = "https://www.xiurenji.net"; - public $xiurenRootUrl = "https://www.xiurenji.net/XiuRen/"; -// public $rootDir = "/Users/shixuesen/Documents/tmp/xiuren/"; -// public $rootDir = "/Volumes/Backup/images/xiuren/"; - public $rootDir = "/Volumes/intel660p/image/xiuren/"; + private $name_dir = [ + "xiuren" => [ + "path" => "XiuRen/", + "dir" => "xiuren/" + ], + "xiaoyu" => [ + "path" => "XiaoYu/", + "dir" => "xiaoyu/" + ], + "youwu" => [ + "path" => "YouWu/", + "dir" => "youwu/" + ], + "mygirl" => [ + "path" => "MyGirl/", + "dir" => "mygirl/" + ], + "huayang" => [ + "path" => "HuaYang/", + "dir" => "huayang/" + ], + "mfstar" => [ + "path" => "MFStar/", + "dir" => "mfstar/" + ], + "imiss" => [ + "path" => "IMiss/", + "dir" => "imiss/" + ] + + ]; + public $domainUrl = "https://www.xiurenb.net/"; + public $xiurenRootUrl = "https://www.xiurenb.net/XiuRen/"; + // public $rootDir = "/Users/shixuesen/Documents/tmp/xiuren/"; + // public $rootDir = "/Volumes/Backup/images/xiuren/"; + public $rootDir = "/Volumes/Crucial X6/Image/xr/"; public $queryInstance; public $queryNew; + public function __construct() { $this->queryInstance = QueryList::getInstance(); $this->queryNew = new QueryList(); } - public function scrapeAlbum() + public function scrapeAll() { + foreach ($this->name_dir as $key => $value) { + # code... + dump("current site: " . $key); + $this->scrapeAlbum($key, 20); + } + // $this->scrapeAlbum("xiuren", 20); + // $this->scrapeAlbum("xiaoyu", 20); + // $this->scrapeAlbum("youwu", 20); + // $this->scrapeAlbum("mygirl", 20); + // $this->scrapeAlbum("huayang", 20); + // $this->scrapeAlbum("mfstar", 20); + // $this->scrapeAlbum("imiss", 20); + } + + public function scrapeAlbum($path, $num = 20, $startPage = 0) { $pageSize = 20; - $pageCount = $this->getEncodeHtmlContent("https://www.xiurenji.net/XiuRen/index.html")->find(".page span")->htmls()->get(0); - print_r($pageCount); + $urlPath = $this->name_dir[$path]["path"]; + $rootDir = $this->rootDir; + $this->rootDir = $this->rootDir . $this->name_dir[$path]["dir"]; + $pageCount = $this->getEncodeHtmlContent("https://www.xiurenb.net/$urlPath/index.html")->find(".page span strong")->htmls()->get(0); + dump("current site item count: " . $pageCount); if ((int)$pageCount > 0) { - $pageCount = 100; - for ($i = 0; $i <= ceil($pageCount / $pageSize); $i++) { + $pageCount = min($pageCount, $num); + for ($i = $startPage; $i <= ceil($pageCount / $pageSize); $i++) { $urlSuffix = ""; if ($i == 0) { $urlSuffix = "index.html"; } else { $urlSuffix = "index" . $i . ".html"; } - $this->scrapePageAlbum($this->xiurenRootUrl . $urlSuffix); -// exit; + $this->scrapePageAlbum($this->domainUrl . $urlPath . $urlSuffix); } } - + $this->rootDir = $rootDir; } - public function scrapePageAlbum($url) { + public function scrapePageAlbum($url) + { $pageContent = $this->getEncodeHtmlContent($url); -// dump($pageContent); - $items = $pageContent->find(".dan a")->getElements(); + $items = $pageContent->find(".i_list a")->getElements(); + $i = 0; foreach ($items as $item) { + // $i++; + // if ($i < 18) { + // continue; + // # code... + // } dump($item->getAttribute("href")); $this->scrapeSingleAlbum($this->domainUrl . $item->getAttribute("href")); } - } - public function scrapeSingleAlbum($url) { + public function scrapeSingleAlbum($url) + { Log::info("scrapeSingleAlbum $url"); $pageContent = $this->getEncodeHtmlContent($url); - $pageSize = 3; - $items = $pageContent->find(".ina p:nth-child(2)")->texts(); - $pageItems = $pageContent->find(".page a:eq(-2)")->htmls(); - $isSinglePage = false; - $pageCount = 0; - if (count($pageItems) <= 0) { - $isSinglePage = true; - $pageCount = 1; -// dump("this album is error: ". $url); -// Log::error("this album is error: " . $url); -// return; + $albumName = $pageContent->find(".item_title h1")->htmls()->get(0); + $pageItems = $pageContent->find(".content:eq(0) .page a")->attrs("href")->all(); + if ($this->checkAlbumHasDownload($albumName)) { + Log::info("已经下载过了,相册名:" . $albumName); + return; + } + dump("当前相册名: " . $albumName); + $imageNo = 1; + $this->parseContent($this->rootDir . $albumName, $pageContent, $imageNo); + $pageItems = array_slice($pageItems, 2, count($pageItems) - 3); + foreach ($pageItems as $item) { + $pageContent = $this->getEncodeHtmlContent($this->domainUrl . $item); + $this->parseContent($this->rootDir . $albumName, $pageContent, $imageNo); } -// dump($pageItems);exit; - $item = $items[0]; -// $imageNum = substr($item, strrpos($item, "[") + 1, strrpos($item, "P]") - strrpos($item, "[") - 1); -// $pageCount = ceil($imageNum / $pageSize); - $pageCount = $pageCount > 0 ? $pageCount : (int)$pageItems[0]; -// dump("pageCount: ". $pageCount . "");exit; - $slashPos = strpos($url, "XiuRen/") + 7; - $dotPos = strrpos($url, "."); - $albumCode = substr($url, $slashPos, $dotPos - $slashPos); -// $albumName0 = substr($item, strpos($item, "["), strrpos($item, "]") - strpos($item, "[") + 1); - $albumName = ltrim(substr($item, 6, strrpos($item, "]") - 5)); - if (mb_strlen($albumName) <= 12 || mb_strlen($albumName) >= 50 || !str_contains($albumName, "No")) { - dump("old Album: " . $albumName); - $albumName = $pageContent->find(".ina p b:nth-child(2)")->texts(); -// $albumName = ltrim(substr($item, 6, strrpos(substr($item, 0, strrpos($item, "@")), " ") - 5)); - dump("new Album: " . urldecode($albumName[0])); - $albumName = urldecode($albumName[0]); - } - $imageNo = 1; - $this->parseContent($this->rootDir. $albumName, $pageContent, $imageNo); - dump("albumName: ". $albumName); -// dump("item: ". $item); -// exit; - for ($i = 1; $i < $pageCount; $i++) { - $pageContent = $this->getEncodeHtmlContent($this->xiurenRootUrl . $albumCode . "_" . $i . ".html"); - $this->parseContent($this->rootDir. $albumName, $pageContent, $imageNo); - } -// dump("slashPos: " . $slashPos . " dotPos: " . $dotPos . " albumCode: ". $albumCode); -// dump($item); -// exit; } public function parseContent($dir, $pageContent, &$imageNo) @@ -114,43 +142,38 @@ class XiurenjiService return; } } - $images = $pageContent->find(".img p img")->getElements(); - $user = $pageContent->find(".title_pc tr:eq(2) td a:eq(2)")->html(); -// $user = $this->queryNew->find(".ina a:eq(-1) b")->html(); - // body > div.nr3 > table.title_pc > tbody > tr:nth-child(3) > td > a:nth-child(2) - $user = iconv_gbk_to_uft8($user); - dump("user is " . $user); - if (str_contains($user, "#") || str_contains($user, "&") || trim($user) == "") { - $user = $this->queryNew->find(".ina a:eq(-1) b")->html();; - $user = iconv_gbk_to_uft8($user); - dump("new user is " . $user); + $images = $pageContent->find(".content p img")->getElements(); + $user = $pageContent->find(".item_info div a:eq(-1) span")->htmls()->get(0); + if ($imageNo == 1) { + // 只在每个相册第一次输出名字 + dump("user is " . $user); } + foreach ($images as $image) { usleep(random_int(10, 100) * 100); $imageUrl = $image->getAttribute("src"); - $trueImageUrl = "https://x1.plmn5.com/U". substr($imageUrl, 2); - $trueImageUrl = "https://www.xiurenji.net" . $imageUrl; + $trueImageUrl = "https://www.xiurenji.net" . $imageUrl; $fileInfo = pathinfo($trueImageUrl); - if (file_exists($dir . "/" .$fileInfo["basename"])) { - rename($dir . "/" . $fileInfo["basename"], $dir . "/" . $imageNo . "-" . $fileInfo["basename"]); + if (file_exists($dir . DIRECTORY_SEPARATOR . $fileInfo["basename"])) { + rename($dir . DIRECTORY_SEPARATOR . $fileInfo["basename"], $dir . DIRECTORY_SEPARATOR . $imageNo . "-" . $fileInfo["basename"]); $imageNo++; continue; } - if (file_exists($dir . "/" . $imageNo . "-" . $fileInfo["basename"])) { - rename($dir . "/" . $imageNo . "-" . $fileInfo["basename"], $dir . "/" . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"]); + if (file_exists($dir . DIRECTORY_SEPARATOR . $imageNo . "-" . $fileInfo["basename"])) { + rename($dir . DIRECTORY_SEPARATOR . $imageNo . "-" . $fileInfo["basename"], $dir . DIRECTORY_SEPARATOR . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"]); $imageNo++; continue; } - if (file_exists($dir . "/" . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"])) { + if (file_exists($dir . DIRECTORY_SEPARATOR . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"])) { $imageNo++; continue; } - dump($fileInfo); + // dump($fileInfo); $attempts = 0; $content = ""; do { try { - $curl_handle=curl_init(); + $curl_handle = curl_init(); curl_setopt($curl_handle, CURLOPT_URL, $trueImageUrl); curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT, 20000); curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1); @@ -162,21 +185,21 @@ class XiurenjiService curl_setopt($curl_handle, CURLOPT_TIMEOUT, 0); curl_setopt($curl_handle, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); curl_setopt($curl_handle, CURLOPT_CUSTOMREQUEST, 'GET'); - curl_setopt($curl_handle, CURLOPT_HTTPHEADER, array( - 'authority: www.xiurenji.net', - 'pragma: no-cache', - 'cache-control: no-cache', - 'sec-ch-ua: "Google Chrome";v="95", "Chromium";v="95", ";Not A Brand";v="99"', - 'sec-ch-ua-mobile: ?0', - 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36', - 'sec-ch-ua-platform: "macOS"', - 'accept: image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', - 'sec-fetch-site: same-origin', - 'sec-fetch-mode: no-cors', - 'sec-fetch-dest: image', - 'referer: https://www.xiurenji.net/XiuRen/9483.html', - 'accept-language: zh-CN,zh;q=0.9', - 'cookie: UM_distinctid=17cfa8bea8eb9e-0dd0c6d032d0fc-1c306851-13c680-17cfa8bea8fc85; CNZZDATA1278618868=1505121253-1636283360-%7C1636283360; __51cke__=; ASPSESSIONIDQAQAATSQ=LBLGNPMDHKKMNOPDBCEAPIMH; __tins__20641871=%7B%22sid%22%3A%201636291046220%2C%20%22vd%22%3A%202%2C%20%22expires%22%3A%201636292852634%7D; __51laig__=2' + curl_setopt($curl_handle, CURLOPT_HTTPHEADER, array( + 'authority: www.xiurenji.net', + 'pragma: no-cache', + 'cache-control: no-cache', + 'sec-ch-ua: "Google Chrome";v="95", "Chromium";v="95", ";Not A Brand";v="99"', + 'sec-ch-ua-mobile: ?0', + 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36', + 'sec-ch-ua-platform: "macOS"', + 'accept: image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', + 'sec-fetch-site: same-origin', + 'sec-fetch-mode: no-cors', + 'sec-fetch-dest: image', + 'referer: https://www.xiurenji.net/XiuRen/9483.html', + 'accept-language: zh-CN,zh;q=0.9', + 'cookie: UM_distinctid=17cfa8bea8eb9e-0dd0c6d032d0fc-1c306851-13c680-17cfa8bea8fc85; CNZZDATA1278618868=1505121253-1636283360-%7C1636283360; __51cke__=; ASPSESSIONIDQAQAATSQ=LBLGNPMDHKKMNOPDBCEAPIMH; __tins__20641871=%7B%22sid%22%3A%201636291046220%2C%20%22vd%22%3A%202%2C%20%22expires%22%3A%201636292852634%7D; __51laig__=2' )); $content = curl_exec($curl_handle); if ($content === false) { @@ -185,36 +208,36 @@ class XiurenjiService throw $le; } curl_close($curl_handle); -// $content = file_get_contents($trueImageUrl); - } catch (ErrorException | Exception $e) { + // $content = file_get_contents($trueImageUrl); + } catch (ErrorException|Exception $e) { echo $e->getTraceAsString() . "\n"; $sleepTime = 10000 * random_int(100, 1000); echo "wait for $trueImageUrl sleep {$sleepTime} nano second \n"; usleep($sleepTime); - $attempts ++; + $attempts++; continue; } break; - } while($attempts < 100); + } while ($attempts < 100); if ($content != "") { - file_put_contents($dir . "/" . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"], $content); + file_put_contents($dir . DIRECTORY_SEPARATOR . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"], $content); } else { - Log::error("image content is empty ". $trueImageUrl); + Log::error("image content is empty " . $trueImageUrl); } $imageNo++; -// dump("current imageNo: " . $imageNo); + // dump("current imageNo: " . $imageNo); } - } - public function getEncodeHtmlContent($url) { + public function getEncodeHtmlContent($url) + { $attempts = 0; $html = ""; - $arrContextOptions=array( - "ssl"=>array( - "allow_self_signed"=>true, - "verify_peer"=>false, - "verify_peer_name"=>false, + $arrContextOptions = array( + "ssl" => array( + "allow_self_signed" => true, + "verify_peer" => false, + "verify_peer_name" => false, ), ); do { @@ -233,7 +256,7 @@ class XiurenjiService CURLOPT_SSL_VERIFYPEER => false, CURLOPT_SSL_VERIFYHOST => false, - CURLOPT_HTTPHEADER => array( + CURLOPT_HTTPHEADER => array( 'authority: www.xiurenji.net', 'pragma: no-cache', 'cache-control: no-cache', @@ -257,23 +280,32 @@ class XiurenjiService echo $error; curl_close($curl); -// echo $response; -// $html = iconv('gb2312','UTF-8//IGNORE', $response); + // echo $response; + // $html = iconv('gb2312','UTF-8//IGNORE', $response); $html = iconv_gbk_to_uft8($response); $this->queryNew->setHtml($response); -// $html = $response; + // $html = $response; } catch (Exception $e) { - echo $e->getMessage(). "\n"; + echo $e->getMessage() . "\n"; echo $e->getTraceAsString() . "\n"; $sleepTime = 10000 * random_int(100, 1000); echo "sleep {$sleepTime} nano second \n"; usleep($sleepTime); - $attempts ++; + $attempts++; continue; } break; - } while($attempts < 100); + } while ($attempts < 100); + // dump("current url: " . $url); return $this->queryInstance->setHtml($html); } + + private function checkAlbumHasDownload($albumName) + { + $record = ImageRecord::where("name", $albumName)->first(); + if ($record != null) { + return true; + } + } } diff --git a/app/Services/YouwuService.php b/app/Services/YouwuService.php new file mode 100644 index 0000000..f0f39f8 --- /dev/null +++ b/app/Services/YouwuService.php @@ -0,0 +1,256 @@ +queryInstance = QueryList::getInstance(); + $this->queryNew = new QueryList(); + } + + public function scrapeAlbum() + { + $pageSize = 20; + $pageCount = $this->getEncodeHtmlContent("https://www.xiurenb.net/YouWu/index.html")->find(".page span strong")->htmls()->get(0); + print_r($pageCount); + if ((int)$pageCount > 0) { + // $pageCount = 20; + for ($i = 0; $i <= ceil($pageCount / $pageSize); $i++) { + $urlSuffix = ""; + if ($i == 0) { + $urlSuffix = "index.html"; + } else { + $urlSuffix = "index" . $i . ".html"; + } + $this->scrapePageAlbum($this->xiurenRootUrl . $urlSuffix); + } + } + } + + public function scrapePageAlbum($url) + { + + $pageContent = $this->getEncodeHtmlContent($url); + $items = $pageContent->find(".i_list a")->getElements(); + $i = 0; + foreach ($items as $item) { + // $i++; + // if ($i < 18) { + // continue; + // # code... + // } + dump($item->getAttribute("href")); + $this->scrapeSingleAlbum($this->domainUrl . $item->getAttribute("href")); + } + } + + public function scrapeSingleAlbum($url) + { + Log::info("scrapeSingleAlbum $url"); + $pageContent = $this->getEncodeHtmlContent($url); + $albumName = $pageContent->find(".item_title h1")->htmls()->get(0); + $pageItems = $pageContent->find(".content:eq(0) .page a")->attrs("href")->all(); + if ($this->checkAlbumHasDownload($albumName)) { + Log::info("已经下载过了,相册名:" . $albumName); + return; + } + $imageNo = 1; + $this->parseContent($this->rootDir . $albumName, $pageContent, $imageNo); + $pageItems = array_slice($pageItems, 2, count($pageItems) - 3); + foreach ($pageItems as $item) { + $pageContent = $this->getEncodeHtmlContent($this->domainUrl . $item); + $this->parseContent($this->rootDir . $albumName, $pageContent, $imageNo); + } + } + + public function parseContent($dir, $pageContent, &$imageNo) + { + if (!is_dir($dir)) { + try { + mkdir($dir); + } catch (Exception $e) { + Log::error($e->getMessage()); + return; + } + } + $images = $pageContent->find(".content p img")->getElements(); + $user = $pageContent->find(".item_info div a:eq(-1) span")->htmls()->get(0); + dump("user is " . $user); + foreach ($images as $image) { + usleep(random_int(10, 100) * 100); + $imageUrl = $image->getAttribute("src"); + $trueImageUrl = "https://www.xiurenji.net" . $imageUrl; + $fileInfo = pathinfo($trueImageUrl); + if (file_exists($dir . DIRECTORY_SEPARATOR . $fileInfo["basename"])) { + rename($dir . DIRECTORY_SEPARATOR . $fileInfo["basename"], $dir . DIRECTORY_SEPARATOR . $imageNo . "-" . $fileInfo["basename"]); + $imageNo++; + continue; + } + if (file_exists($dir . DIRECTORY_SEPARATOR . $imageNo . "-" . $fileInfo["basename"])) { + rename($dir . DIRECTORY_SEPARATOR . $imageNo . "-" . $fileInfo["basename"], $dir . DIRECTORY_SEPARATOR . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"]); + $imageNo++; + continue; + } + if (file_exists($dir . DIRECTORY_SEPARATOR . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"])) { + $imageNo++; + continue; + } + dump($fileInfo); + $attempts = 0; + $content = ""; + do { + try { + $curl_handle = curl_init(); + curl_setopt($curl_handle, CURLOPT_URL, $trueImageUrl); + curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT, 20000); + curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($curl_handle, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'); + curl_setopt($curl_handle, CURLOPT_REFERER, $this->xiurenRootUrl); + curl_setopt($curl_handle, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($curl_handle, CURLOPT_ENCODING, ''); + curl_setopt($curl_handle, CURLOPT_MAXREDIRS, 10); + curl_setopt($curl_handle, CURLOPT_TIMEOUT, 0); + curl_setopt($curl_handle, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); + curl_setopt($curl_handle, CURLOPT_CUSTOMREQUEST, 'GET'); + curl_setopt($curl_handle, CURLOPT_HTTPHEADER, array( + 'authority: www.xiurenji.net', + 'pragma: no-cache', + 'cache-control: no-cache', + 'sec-ch-ua: "Google Chrome";v="95", "Chromium";v="95", ";Not A Brand";v="99"', + 'sec-ch-ua-mobile: ?0', + 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36', + 'sec-ch-ua-platform: "macOS"', + 'accept: image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', + 'sec-fetch-site: same-origin', + 'sec-fetch-mode: no-cors', + 'sec-fetch-dest: image', + 'referer: https://www.xiurenji.net/XiuRen/9483.html', + 'accept-language: zh-CN,zh;q=0.9', + 'cookie: UM_distinctid=17cfa8bea8eb9e-0dd0c6d032d0fc-1c306851-13c680-17cfa8bea8fc85; CNZZDATA1278618868=1505121253-1636283360-%7C1636283360; __51cke__=; ASPSESSIONIDQAQAATSQ=LBLGNPMDHKKMNOPDBCEAPIMH; __tins__20641871=%7B%22sid%22%3A%201636291046220%2C%20%22vd%22%3A%202%2C%20%22expires%22%3A%201636292852634%7D; __51laig__=2' + )); + $content = curl_exec($curl_handle); + if ($content === false) { + $le = new Exception("get image has error: " . curl_error($curl_handle)); + curl_close($curl_handle); + throw $le; + } + curl_close($curl_handle); + // $content = file_get_contents($trueImageUrl); + } catch (ErrorException|Exception $e) { + echo $e->getTraceAsString() . "\n"; + $sleepTime = 10000 * random_int(100, 1000); + echo "wait for $trueImageUrl sleep {$sleepTime} nano second \n"; + usleep($sleepTime); + $attempts++; + continue; + } + break; + } while ($attempts < 100); + if ($content != "") { + file_put_contents($dir . DIRECTORY_SEPARATOR . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"], $content); + } else { + Log::error("image content is empty " . $trueImageUrl); + } + $imageNo++; + // dump("current imageNo: " . $imageNo); + } + } + + public function getEncodeHtmlContent($url) + { + $attempts = 0; + $html = ""; + $arrContextOptions = array( + "ssl" => array( + "allow_self_signed" => true, + "verify_peer" => false, + "verify_peer_name" => false, + ), + ); + do { + try { + $curl = curl_init(); + + curl_setopt_array($curl, array( + CURLOPT_URL => $url, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_ENCODING => 'UTF-8', + CURLOPT_MAXREDIRS => 10, + CURLOPT_TIMEOUT => 0, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, + CURLOPT_CUSTOMREQUEST => 'GET', + CURLOPT_SSL_VERIFYPEER => false, + CURLOPT_SSL_VERIFYHOST => false, + + CURLOPT_HTTPHEADER => array( + 'authority: www.xiurenji.net', + 'pragma: no-cache', + 'cache-control: no-cache', + 'sec-ch-ua: " Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"', + 'sec-ch-ua-mobile: ?0', + 'upgrade-insecure-requests: 1', + 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36', + 'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', + 'sec-fetch-site: same-origin', + 'sec-fetch-mode: navigate', + 'sec-fetch-user: ?1', + 'sec-fetch-dest: document', + 'referer: https://www.xiurenji.net/XiuRen/', + 'accept-language: zh-CN,zh;q=0.9', + 'cookie: UM_distinctid=177fd93a0ca93c-06b94658d5d337-121a4759-13c680-177fd93a0cbcaf; ASPSESSIONIDCATDQACD=FDPMPCLAMHNCPJFCBLKFLCKH; CNZZDATA1278618868=367774893-1614867004-%7C1625926983; __51cke__=; __tins__20641871=%7B%22sid%22%3A%201625931982756%2C%20%22vd%22%3A%203%2C%20%22expires%22%3A%201625933829110%7D; __51laig__=7' + ), + )); + + $response = curl_exec($curl); + $error = curl_error($curl); + echo $error; + curl_close($curl); + + // echo $response; + // $html = iconv('gb2312','UTF-8//IGNORE', $response); + $html = iconv_gbk_to_uft8($response); + $this->queryNew->setHtml($response); + // $html = $response; + } catch (Exception $e) { + echo $e->getMessage() . "\n"; + echo $e->getTraceAsString() . "\n"; + $sleepTime = 10000 * random_int(100, 1000); + echo "sleep {$sleepTime} nano second \n"; + usleep($sleepTime); + $attempts++; + continue; + } + break; + } while ($attempts < 100); + dump("current url: " . $url); + return $this->queryInstance->setHtml($html); + } + + + private function checkAlbumHasDownload($albumName) + { + $record = ImageRecord::where("name", $albumName)->first(); + if ($record != null) { + return true; + } + } +} diff --git a/app/Utils/helper.php b/app/Utils/helper.php index c5b2a6d..e5742e4 100644 --- a/app/Utils/helper.php +++ b/app/Utils/helper.php @@ -78,3 +78,23 @@ function iconv_gbk_to_uft8($string){ } + +function scanFilesWithoutPath($path): array +{ + $allFiles = []; + if (is_dir($path)) { + $files = scandir($path); + foreach ($files as $file) { + if ($file == "." || $file == "..") { + continue; + } + if (is_dir($path . DIRECTORY_SEPARATOR . $file)) { + $allFiles = array_merge($allFiles, scanFilesWithoutPath($path . DIRECTORY_SEPARATOR . $file)); + } + if (is_file($path . DIRECTORY_SEPARATOR . $file)) { + $allFiles[] = $file; + } + } + } + return $allFiles; +} diff --git a/fail.log b/fail.log index ba103b1..db34c10 100644 --- a/fail.log +++ b/fail.log @@ -2412,3 +2412,6 @@ /Users/shixuesen/OneDrive/Pictures/instagram/duyenn.hipp/ https://scontent-lax3-2.cdninstagram.com/v/t51.2885-15/e35/271023175_690888875412119_539636855243307136_n.jpg?se=7&_nc_ht=scontent-lax3-2.cdninstagram.com&_nc_cat=111&_nc_ohc=uX8kSCWkRdkAX-ESbX3&edm=ABmJApABAAAA&ccb=7-4&ig_cache_key=Mjc0MjEwNDQ2MjAwMzcxNTUyMg%3D%3D.2-ccb7-4&oh=00_AT9u-NUFO26OzvXmNv82rLjcg3LyNuZn047A8eTYPwvzvA&oe=61D964CA&_nc_sid=6136e7 /Users/shixuesen/OneDrive/Pictures/instagram/duyenn.hipp/ https://scontent-lax3-2.cdninstagram.com/v/t51.2885-15/e35/271200431_406072771311487_3149765346719126659_n.jpg?se=7&_nc_ht=scontent-lax3-2.cdninstagram.com&_nc_cat=107&_nc_ohc=vmY2J-jCQJQAX9G5WJF&edm=ABmJApABAAAA&ccb=7-4&ig_cache_key=Mjc0MjEwNDQ2MTkxMTM3NzM2MQ%3D%3D.2-ccb7-4&oh=00_AT_AWjgpeZtbY2FimwMPXSsJ-8w3-N5N2d6bo8r_PmxaBg&oe=61D9D733&_nc_sid=6136e7 /Users/shixuesen/OneDrive/Pictures/instagram/duyenn.hipp/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/e35/p480x480/271184973_1101596533921696_3101847491200714204_n.jpg?_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=104&_nc_ohc=TOnuDg8jNGoAX9fYYjY&edm=ABmJApABAAAA&ccb=7-4&ig_cache_key=Mjc0MjAwNjE1NzkwMjQ4MjA0MA%3D%3D.2-ccb7-4&oh=00_AT8VY0T2re4YqMfthluidg7dx3aHiyqCWFeHW1js-Sq5RQ&oe=61DA983E&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/amandacerny/ https://scontent-lax3-1.cdninstagram.com/v/t50.2886-16/10000000_458426249278269_2831399203059854387_n.mp4?cb=9ad74b5e-c1c39920&efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLjcyMC5jbGlwcy5iYXNlbGluZSJ9&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=109&_nc_ohc=Ex7g5lOfRAEAX_8S2xn&edm=ABmJApABAAAA&vs=639577407291135_135919037&_nc_vs=HBksFQAYJEdJQ1dtQUE5Yzc2dDc2QUJBRFBzdE9PU0prc25icV9FQUFBRhUAAsgBABUAGCRHTmNkTnhBQlBMcFdiUW9CQUdIZnhIUnU4MXQwYnFfRUFBQUYVAgLIAQAoABgAGwAVAAAm3ry53ovKyj8VAigCQzMsF0BNHdLxqfvnGBJkYXNoX2Jhc2VsaW5lXzFfdjERAHX%2BBwA%3D&_nc_rid=4bfbf6bc16&ccb=7-4&oe=61EF5C3A&oh=00_AT9EwLhWcU21qFR93E_OXz6-OqMPgCHplrGD5F_Exc6Z8Q&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/parlovetati/ https://scontent-lax3-2.cdninstagram.com/o1/v/t16/f1/m38/3C46336060C0E88EF14A62867DEF31A7_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLjcyMC5zdG9yeS5iYXNlbGluZW9pbCJ9&_nc_ht=scontent-lax3-2.cdninstagram.com&_nc_cat=106&vs=1373815423049090_3319206028&_nc_vs=HBksFQIYRGlnX3hwdl9lcGhlbWVyYWwvM0M0NjMzNjA2MEMwRTg4RUYxNEE2Mjg2N0RFRjMxQTdfdmlkZW9fZGFzaGluaXQubXA0FQACyAEAFQAYJEdQVktQUkFYc1FZRG9lc0FBTEs4UDNaYmcxTlZicGt3QUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJt6%2B16WA0%2Bo%2FFQIoAkMzLBdABzMzMzMzMxgVZGFzaF9iYXNlbGluZW9pbF8xX3YxEQB16AcA&_nc_rid=d8368805af&cb=9ad74b5e-c1c39920&ccb=9-4&oe=61F1818A&oh=00_AT-vnjS9ihqsdV079FI6YMHhqJ_eosGkWn3E222yC0ijrg&_nc_sid=bab638 +/Users/shixuesen/OneDrive/Pictures/instagram/Likes/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/e35/274209204_124732130092305_602221022598421405_n.jpg?_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=109&_nc_ohc=X96gve0P_9MAX_S7q4S&edm=AJ9x6zYBAAAA&ccb=7-4&ig_cache_key=Mjc3NTExNDIxNTkwMjUxNTAxOQ%3D%3D.2-ccb7-4&oh=00_AT9YuSvddWiOn7hDtogr1XboXsovLzCcnc3zKMNRWe0iBA&oe=6214C58B&_nc_sid=cff2a4