From 53b87f383d38d6f4dcbf1f59dc8b20841442f715 Mon Sep 17 00:00:00 2001 From: shixuesen Date: Fri, 3 Jun 2022 17:26:09 +0800 Subject: [PATCH] feature: fix format query error --- app/Console/Commands/InstagramScrape.php | 34 +++--- app/Services/BilibiliServiceV2.php | 4 + app/Services/FfmpegService.php | 4 +- app/Services/InstagramService.php | 127 +++++++++++++++++++++++ app/Services/NewNvshenService.php | 4 +- fail.log | 13 +++ 6 files changed, 171 insertions(+), 15 deletions(-) diff --git a/app/Console/Commands/InstagramScrape.php b/app/Console/Commands/InstagramScrape.php index 906e6f9..6f1a09a 100644 --- a/app/Console/Commands/InstagramScrape.php +++ b/app/Console/Commands/InstagramScrape.php @@ -5,6 +5,7 @@ namespace App\Console\Commands; use App\Services\Ins24Service; use App\Services\InstagramService; use Illuminate\Console\Command; +use Illuminate\Support\Facades\Redis; class InstagramScrape extends Command { @@ -13,7 +14,7 @@ class InstagramScrape extends Command * * @var string */ - protected $signature = 'ins:like {type} {start}'; + protected $signature = 'ins:like'; /** * The console command description. @@ -39,12 +40,9 @@ class InstagramScrape extends Command */ public function handle() { -// $dir = "/aaa/bbb"; -// $dir = str_replace("/", "", $dir); -// echo $dir;exit; - // - $start = $this->argument('start'); - $type = $this->argument('type'); + +// $start = $this->argument('start'); +// $type = $this->argument('type'); // echo $start;exit; // $userList = file("/Users/shixuesen/OneDrive/Pictures/instagram/user.txt"); // $userList = array_slice($userList, $start, 65); @@ -52,16 +50,28 @@ class InstagramScrape extends Command // print_r($userList);exit; $ins = new InstagramService(); // $ins->getUserNameById('4156629214');exit; - if ($type == "feed") { - # code... + + $ins->scrapeUsersInFile("/Users/shixuesen/OneDrive/Pictures/instagram/user_0.txt");exit; + $lastInsStart = Redis::connection()->get("last_ins_start"); + dump("lastInsStart is $lastInsStart"); + $lastQueryType = Redis::connection()->get("last_query_type"); + dump("lastQueryType is $lastQueryType"); + if ($lastQueryType == null || $lastQueryType == "user") { $ins->scrapeFeeds(); - exit; + Redis::connection()->set("last_query_type", "feed"); } else { - $ins->scrapeUsers($start); + if ($lastInsStart == null) { + $lastInsStart = 0; + } else { + $lastInsStart = 54 - (int)$lastInsStart; + } + Redis::connection()->set("last_ins_start", $lastInsStart); + Redis::connection()->set("last_query_type", "user"); + $ins->scrapeUsers($lastInsStart); $ins->scrapeLikedUsers(); $ins->scrapeCollection(); - exit; } + exit; // $ins->scrapeFeeds();exit; $ins->scrapeUsers($start); $ins->scrapeLikedUsers(); diff --git a/app/Services/BilibiliServiceV2.php b/app/Services/BilibiliServiceV2.php index eef0cdd..06226e3 100644 --- a/app/Services/BilibiliServiceV2.php +++ b/app/Services/BilibiliServiceV2.php @@ -977,6 +977,10 @@ done && echo "ok"'); $formatInfoShellResult = shell_exec($formatInfoShell); $formatInfoList = json_decode($formatInfoShellResult, true); $formatArray = []; + if (!is_array($formatInfoList)) { + Log::error("$aid format info error " . $formatInfoShellResult); + break; + } foreach ($formatInfoList as $formatInfo) { $keys = array_keys($formatInfo["streams"]); $largeHev1FormatCode = -1; diff --git a/app/Services/FfmpegService.php b/app/Services/FfmpegService.php index 68729bf..9934b78 100644 --- a/app/Services/FfmpegService.php +++ b/app/Services/FfmpegService.php @@ -21,6 +21,8 @@ class FfmpegService private $needRemoveExistFiles = false; + const DEFAULT_EXTENSION = "mp4"; + public function __construct() { $this->mediainfo = new MediaInfo(); @@ -167,7 +169,7 @@ class FfmpegService if (ends_with($fileInfo["filename"], "-x265")) { return; } - $targetFile = $fileInfo["dirname"] . '/' .$fileInfo["filename"] . '-x265'. '.' . $fileInfo["extension"]; + $targetFile = $fileInfo["dirname"] . DIRECTORY_SEPARATOR .$fileInfo["filename"] . '-x265'. '.' . self::DEFAULT_EXTENSION; if (is_file($targetFile) && $this->isNeedRemoveExistFiles()) { Log::info("$targetFile is exists"); unlink($pathFile); diff --git a/app/Services/InstagramService.php b/app/Services/InstagramService.php index 5057aa3..052b6de 100644 --- a/app/Services/InstagramService.php +++ b/app/Services/InstagramService.php @@ -549,6 +549,133 @@ class InstagramService } } + public function scrapeUsersInFile($file) + { + + $baseImageDir = "/Users/shixuesen/OneDrive/Pictures/instagram/"; + + try { + + $userList = file($file); + foreach ($userList as $userName) { + $trueName = trim($userName); + $thisUserImageDir = $baseImageDir . $trueName . DIRECTORY_SEPARATOR; + if (!is_dir($thisUserImageDir)) { + mkdir($thisUserImageDir); + } + try { + $userId = $this->ig->people->getUserIdForName(trim($userName)); + } catch (Exception $e) { + // if ($e instanceof UserNotFou) + Log::error("ins get user id for name error: " . $e->getMessage() . " username is " . $userName); + $userId = null; + if (array_key_exists(trim($userName), $this->userList)) { + $userId = Arr::get($userList, trim($userName)); + } + if ($userId == null) { + continue; + } + } + echo "\n username: " . $trueName . " " . $userId . "\n"; + + try { + $response = $this->ig->story->getUserReelMediaFeed($userId); + } catch (Exception $e) { + Log::error("current user has error, $userName, " . $e->getMessage()); + } + foreach ($response->getItems() as $item) { + if (Item::VIDEO == $item->getMediaType()) { + $videoUrl = $item->getVideoVersions()[0]->getUrl(); + $res = $this->downloadFile($videoUrl, 0, $thisUserImageDir); + } + } + $userItemNum = 0; + sleep(5 * random_int(1, 10)); + $maxId = null; + do { + // Request the page corresponding to maxId. + echo "\n current maxId: " . $maxId; + try { + $response = $this->ig->timeline->getUserFeed($userId, $maxId); + } catch (Exception $e) { + Log::error("current user has error, $userName, " . $e->getMessage()); + continue 2; + } + + // In this example we're simply printing the IDs of this page's items. + foreach ($response->getItems() as $item) { + switch ($item->getMediaType()) { + case Item::PHOTO: + $imageUrl = $item->getImageVersions2()->getCandidates()[0]->getUrl(); + $res = $this->downloadFile($imageUrl, 0, $thisUserImageDir); + break; + case Item::VIDEO: + $videoUrl = $item->getVideoVersions()[0]->getUrl(); + $res = $this->downloadFile($videoUrl, 0, $thisUserImageDir); + break; + case Item::CAROUSEL: + foreach ($item->getCarouselMedia() as $imageItem) { + $imageUrl = $imageItem->getImageVersions2()->getCandidates()[0]->getUrl(); + $res = $this->downloadFile($imageUrl, 0, $thisUserImageDir); + } + break; + } + + sleep(5 * random_int(1, 10)); + $userItemNum++; + if ($userItemNum > 500) { + break; + } + } + + // Now we must update the maxId variable to the "next page". + // This will be a null value again when we've reached the last page! + // And we will stop looping through pages as soon as maxId becomes null. + $maxId = $response->getNextMaxId(); + echo "\n new maxId: " . $maxId . "\n"; + + // Sleep for 5 seconds before requesting the next page. This is just an + // example of an okay sleep time. It is very important that your scripts + // always pause between requests that may run very rapidly, otherwise + // Instagram will throttle you temporarily for abusing their API! + echo "\n Sleeping for 5s...\n"; + sleep(10); + } while ($maxId !== null); // Must use "!==" for comparison instead of "!=". + a: + } + // Get the UserPK ID for "natgeo" (National Geographic). + /** + * $userId = $ig->people->getUserIdForName('nyanchan22'); + * + * // Starting at "null" means starting at the first page. + * $maxId = null; + * do { + * // Request the page corresponding to maxId. + * $response = $ig->timeline->getUserFeed($userId, $maxId); + * + * // In this example we're simply printing the IDs of this page's items. + * foreach ($response->getItems() as $item) { + * printf("[%s] https://instagram.com/p/%s/\n", $item->getId(), $item->getCode()); + * } + * + * // Now we must update the maxId variable to the "next page". + * // This will be a null value again when we've reached the last page! + * // And we will stop looping through pages as soon as maxId becomes null. + * $maxId = $response->getNextMaxId(); + * + * // Sleep for 5 seconds before requesting the next page. This is just an + * // example of an okay sleep time. It is very important that your scripts + * // always pause between requests that may run very rapidly, otherwise + * // Instagram will throttle you temporarily for abusing their API! + * echo "Sleeping for 5s...\n"; + * sleep(5); + * } while ($maxId !== null); // Must use "!==" for comparison instead of "!=". + */ + } catch (Exception $e) { + echo 'scrapeUsers something went wrong: ' . $e->getTraceAsString() . "\n"; + } + } + public function scrapeCollection() { diff --git a/app/Services/NewNvshenService.php b/app/Services/NewNvshenService.php index 41f6179..57d6361 100644 --- a/app/Services/NewNvshenService.php +++ b/app/Services/NewNvshenService.php @@ -169,7 +169,7 @@ class NewNvshenService } else { $albumQl = $mainQl->get($baseAlbumUrl . $i . ".html"); } - + } // dump($albumQl->getHtml()); $albumList = $albumQl->find(".igalleryli > .igalleryli_div > .igalleryli_link")->attrs("href"); @@ -192,7 +192,7 @@ class NewNvshenService $page = $mainQl->get($baseUrl . $album, [], ['maxTry' => 5])->find(".albumInfo > span")->htmls(); $title = $mainQl->get($baseUrl . $album, [], ['maxTry' => 5])->find(".albumTitle > #htilte")->htmls(); $imageSource = $mainQl->get($baseUrl . $album, [], ['maxTry' => 5])->find("#hgallery img:nth-child(1)")->attr("src"); - dump($imageSource);exit; + dump($imageSource); $imageSourceParts = pathinfo($imageSource, PATHINFO_DIRNAME); break; } catch (ConnectException $e) { diff --git a/fail.log b/fail.log index 82ac9a8..bfd56a0 100644 --- a/fail.log +++ b/fail.log @@ -2448,3 +2448,16 @@ /Users/shixuesen/OneDrive/Pictures/instagram/limerencem_/ https://scontent-sjc3-1.cdninstagram.com/v/t51.2885-15/280690646_420109199537856_7984527567121575817_n.jpg?se=7&stp=dst-jpg_e35&_nc_ht=scontent-sjc3-1.cdninstagram.com&_nc_cat=100&_nc_ohc=MDdTeJadha4AX8oxxz5&edm=ABmJApABAAAA&ccb=7-4&ig_cache_key=MjgzNjYwOTY5MjMyODA5NzM4NQ%3D%3D.2-ccb7-4&oh=00_AT8YPHu1oqtmBmUyYfIBYvG27wVJaybjNXT0gE7aaJQ5BQ&oe=628A423D&_nc_sid=6136e7 /Users/shixuesen/OneDrive/Pictures/instagram/mobe_carrie0223/ https://scontent-sjc3-1.cdninstagram.com/v/t50.16885-16/10000000_575290063870295_2176192632399731540_n.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLjk2MC5pZ3R2LmJhc2VsaW5lIn0&_nc_ht=scontent-sjc3-1.cdninstagram.com&_nc_cat=111&_nc_ohc=UdFKE-wi4ngAX-tv9eU&edm=ABmJApABAAAA&vs=721070428925146_1808951524&_nc_vs=HBksFQAYJEdJQ1dtQUJYV2VZbk9Rc0NBRlFiQWxHcll6TWVidlZCQUFBRhUAAsgBABUAGCRHQXd2d3hCWlJHekE1d3NDQUtSTnFCX1BYUk1lYnZWQkFBQUYVAgLIAQAoABgAGwGIB3VzZV9vaWwBMRUAACa%2B9NCAv8LZPxUCKAJDMywXQGbQAAAAAAAYEmRhc2hfYmFzZWxpbmVfMV92MREAdewHAA%3D%3D&_nc_rid=800da169db&ccb=7-4&oe=62858148&oh=00_AT_XGuuHttTEnM3Eqcdz8jPhmgPo4LU2j_9Gho5enXIjPA&_nc_sid=6136e7 /Users/shixuesen/OneDrive/Pictures/instagram/Likes_new/ https://scontent-sjc3-1.cdninstagram.com/v/t51.2885-15/281384435_331717812436116_1816666054432280503_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-sjc3-1.cdninstagram.com&_nc_cat=100&_nc_ohc=pwqZ2ztM-NIAX-lLVW9&edm=AJ9x6zYBAAAA&ccb=7-4&ig_cache_key=Mjg0MDgxMTgzMjQ2MzAwMzAzOA%3D%3D.2-ccb7-4&oh=00_AT_I6GONjEzcTS_guMhnemH-o4P96PCo5uW2s-S2PhrDaA&oe=628CA8DD&_nc_sid=cff2a4 +/Users/shixuesen/OneDrive/Pictures/instagram/na0912mi/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/61186834_839138086451498_6688797702998385363_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=102&_nc_ohc=HS4rzTAMAaoAX_u-JFD&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjA1MDk3MjA1NTI3NjU3Njk1OQ%3D%3D.2-ccb7-5&oh=00_AT-6-tRFfVUKekt0ypEFVSU18Tefgeaj35RwemlguKM0kQ&oe=629F8932&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/173555113_158940469463752_1273918341459041977_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=104&_nc_ohc=WwBCCfr7BPkAX8Zx5IW&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MjM5NTU5MjE2MjQ5ODQyMA%3D%3D.2-ccb7-5&oh=00_AT-LQHrsJPVFwKzbWuy7tM3fZOgBVcHXecUYfCwTKgd9hA&oe=62A080D4&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/172964141_447968899816612_2584625879344013922_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=104&_nc_ohc=NYE1pAxy9Q8AX_XKoBa&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MjM5NTU5MjIwNDQwNTg2MQ%3D%3D.2-ccb7-5&oh=00_AT_i5XSHCYqkDro9ptnOOW-BAAmGUWWZxY2DL42uLImuZw&oe=62A031CD&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/173115739_2873810652872924_338860103281055918_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=108&_nc_ohc=UCYIbakzuAMAX_z1D2A&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MTY0ODc0Mjg1ODAyMzk0NQ%3D%3D.2-ccb7-5&oh=00_AT_TwbDnLucvpJQzLM767E2Qy4JYC0-PwF-RCjzFjmaOZQ&oe=62A0416D&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-2.cdninstagram.com/v/t51.2885-15/173372067_290935769325108_1455377889709464537_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-2.cdninstagram.com&_nc_cat=100&_nc_ohc=oBtXfuDummwAX8ZjX4A&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MTY0ODc0MjgyNDMyNDMwOQ%3D%3D.2-ccb7-5&oh=00_AT9N3GuGpbEUTZ4YjjeaoEc_ENJbg1w0hWAEqeXfkzFBnQ&oe=62A012EC&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/173525602_896032524297586_2777976376145060666_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=110&_nc_ohc=wln3zIBOTYIAX8jGzWi&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MTY0ODc0MjgzMjc0MDM2MA%3D%3D.2-ccb7-5&oh=00_AT9DOzOvYpoSSf205LgfKhTkvi96g0YucE3ij94AeOhrSA&oe=629FB367&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/173144690_192316432527958_2445310581992088854_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=110&_nc_ohc=UlVWvNJZtXEAX9cvxeq&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MTY0ODc0Mjg0OTYzMDA0Mw%3D%3D.2-ccb7-5&oh=00_AT8lxJx_L1pu2MDdf4eZi_wQ1N6k2RHX91PnLJH7aYLpPQ&oe=62A1990F&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-2.cdninstagram.com/v/t51.2885-15/172017493_205532384351931_4365940484017501301_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-2.cdninstagram.com&_nc_cat=103&_nc_ohc=UqL14M6Fel0AX8WA2n4&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MDk0MzM5NTY3OTY2NzU5OA%3D%3D.2-ccb7-5&oh=00_AT-9gAzMHHO6n_OdqwTgtayZHPyG4OzNPuT2rZC7Pdppow&oe=62A11966&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/172723871_893234137887501_5067974151819050692_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=109&_nc_ohc=nkRo6X4xgT8AX_P1LwF&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MDk0MzM5NTU4NzQ5MzQ4NA%3D%3D.2-ccb7-5&oh=00_AT-vPkJcpo7c4YII6R61COswJ20ubHq3-6mGqj0q0L0mxg&oe=62A14CCF&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-2.cdninstagram.com/v/t51.2885-15/172023579_584149625877295_524181339794551530_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-2.cdninstagram.com&_nc_cat=107&_nc_ohc=__cq2hZhscgAX8RQuaM&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MDk0MzM5NTU2MjM2NTkzMw%3D%3D.2-ccb7-5&oh=00_AT-qjhBnh-7tO_NKKUE-mXZo8eoOznhtRr_QHg-eoenaFw&oe=62A0480B&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/171719415_211659767399393_4429955357558979093_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=105&_nc_ohc=GawmD2IxEs0AX80cCDx&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MDk0MzM5NTU3OTE1Njg3MA%3D%3D.2-ccb7-5&oh=00_AT-HdXp09qUSLWIVYnT1EDXpDn5fSJzxwsa1RxQQmCdH-A&oe=629FAEE7&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/172334246_806047096931685_4756653144307888773_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=109&_nc_ohc=M8sa_AyFVSQAX9j7yNz&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MDk0MzM5NTU4NzQ5NDIwNw%3D%3D.2-ccb7-5&oh=00_AT95MmNpainEC-xaVDzCNpuHfpYWkXaGS1K_xILTO38NUw&oe=629FF886&_nc_sid=6136e7 +/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/170241954_760808061306190_5646851458076937382_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=110&_nc_ohc=Gte0wJik8KEAX8kt9FI&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU0ODc3ODQ5NTExNzE5MTIyMQ%3D%3D.2-ccb7-5&oh=00_AT9ekl81QEFFSwZIDj7NwcHGI45KuwyxLITw64XZh0mWDA&oe=62A0FA88&_nc_sid=6136e7