Browse Source

feature: fix format query error

feature/new_bilibili_and_instagram_sxs20191126
shixuesen 3 years ago
parent
commit
53b87f383d
6 changed files with 171 additions and 15 deletions
  1. +22
    -12
      app/Console/Commands/InstagramScrape.php
  2. +4
    -0
      app/Services/BilibiliServiceV2.php
  3. +3
    -1
      app/Services/FfmpegService.php
  4. +127
    -0
      app/Services/InstagramService.php
  5. +2
    -2
      app/Services/NewNvshenService.php
  6. +13
    -0
      fail.log

+ 22
- 12
app/Console/Commands/InstagramScrape.php View File

@ -5,6 +5,7 @@ namespace App\Console\Commands;
use App\Services\Ins24Service;
use App\Services\InstagramService;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Redis;
class InstagramScrape extends Command
{
@ -13,7 +14,7 @@ class InstagramScrape extends Command
*
* @var string
*/
protected $signature = 'ins:like {type} {start}';
protected $signature = 'ins:like';
/**
* The console command description.
@ -39,12 +40,9 @@ class InstagramScrape extends Command
*/
public function handle()
{
// $dir = "/aaa/bbb";
// $dir = str_replace("/", "", $dir);
// echo $dir;exit;
//
$start = $this->argument('start');
$type = $this->argument('type');
// $start = $this->argument('start');
// $type = $this->argument('type');
// echo $start;exit;
// $userList = file("/Users/shixuesen/OneDrive/Pictures/instagram/user.txt");
// $userList = array_slice($userList, $start, 65);
@ -52,16 +50,28 @@ class InstagramScrape extends Command
// print_r($userList);exit;
$ins = new InstagramService();
// $ins->getUserNameById('4156629214');exit;
if ($type == "feed") {
# code...
$ins->scrapeUsersInFile("/Users/shixuesen/OneDrive/Pictures/instagram/user_0.txt");exit;
$lastInsStart = Redis::connection()->get("last_ins_start");
dump("lastInsStart is $lastInsStart");
$lastQueryType = Redis::connection()->get("last_query_type");
dump("lastQueryType is $lastQueryType");
if ($lastQueryType == null || $lastQueryType == "user") {
$ins->scrapeFeeds();
exit;
Redis::connection()->set("last_query_type", "feed");
} else {
$ins->scrapeUsers($start);
if ($lastInsStart == null) {
$lastInsStart = 0;
} else {
$lastInsStart = 54 - (int)$lastInsStart;
}
Redis::connection()->set("last_ins_start", $lastInsStart);
Redis::connection()->set("last_query_type", "user");
$ins->scrapeUsers($lastInsStart);
$ins->scrapeLikedUsers();
$ins->scrapeCollection();
exit;
}
exit;
// $ins->scrapeFeeds();exit;
$ins->scrapeUsers($start);
$ins->scrapeLikedUsers();


+ 4
- 0
app/Services/BilibiliServiceV2.php View File

@ -977,6 +977,10 @@ done && echo "ok"');
$formatInfoShellResult = shell_exec($formatInfoShell);
$formatInfoList = json_decode($formatInfoShellResult, true);
$formatArray = [];
if (!is_array($formatInfoList)) {
Log::error("$aid format info error " . $formatInfoShellResult);
break;
}
foreach ($formatInfoList as $formatInfo) {
$keys = array_keys($formatInfo["streams"]);
$largeHev1FormatCode = -1;


+ 3
- 1
app/Services/FfmpegService.php View File

@ -21,6 +21,8 @@ class FfmpegService
private $needRemoveExistFiles = false;
const DEFAULT_EXTENSION = "mp4";
public function __construct()
{
$this->mediainfo = new MediaInfo();
@ -167,7 +169,7 @@ class FfmpegService
if (ends_with($fileInfo["filename"], "-x265")) {
return;
}
$targetFile = $fileInfo["dirname"] . '/' .$fileInfo["filename"] . '-x265'. '.' . $fileInfo["extension"];
$targetFile = $fileInfo["dirname"] . DIRECTORY_SEPARATOR .$fileInfo["filename"] . '-x265'. '.' . self::DEFAULT_EXTENSION;
if (is_file($targetFile) && $this->isNeedRemoveExistFiles()) {
Log::info("$targetFile is exists");
unlink($pathFile);


+ 127
- 0
app/Services/InstagramService.php View File

@ -549,6 +549,133 @@ class InstagramService
}
}
public function scrapeUsersInFile($file)
{
$baseImageDir = "/Users/shixuesen/OneDrive/Pictures/instagram/";
try {
$userList = file($file);
foreach ($userList as $userName) {
$trueName = trim($userName);
$thisUserImageDir = $baseImageDir . $trueName . DIRECTORY_SEPARATOR;
if (!is_dir($thisUserImageDir)) {
mkdir($thisUserImageDir);
}
try {
$userId = $this->ig->people->getUserIdForName(trim($userName));
} catch (Exception $e) {
// if ($e instanceof UserNotFou)
Log::error("ins get user id for name error: " . $e->getMessage() . " username is " . $userName);
$userId = null;
if (array_key_exists(trim($userName), $this->userList)) {
$userId = Arr::get($userList, trim($userName));
}
if ($userId == null) {
continue;
}
}
echo "\n username: " . $trueName . " " . $userId . "\n";
try {
$response = $this->ig->story->getUserReelMediaFeed($userId);
} catch (Exception $e) {
Log::error("current user has error, $userName, " . $e->getMessage());
}
foreach ($response->getItems() as $item) {
if (Item::VIDEO == $item->getMediaType()) {
$videoUrl = $item->getVideoVersions()[0]->getUrl();
$res = $this->downloadFile($videoUrl, 0, $thisUserImageDir);
}
}
$userItemNum = 0;
sleep(5 * random_int(1, 10));
$maxId = null;
do {
// Request the page corresponding to maxId.
echo "\n current maxId: " . $maxId;
try {
$response = $this->ig->timeline->getUserFeed($userId, $maxId);
} catch (Exception $e) {
Log::error("current user has error, $userName, " . $e->getMessage());
continue 2;
}
// In this example we're simply printing the IDs of this page's items.
foreach ($response->getItems() as $item) {
switch ($item->getMediaType()) {
case Item::PHOTO:
$imageUrl = $item->getImageVersions2()->getCandidates()[0]->getUrl();
$res = $this->downloadFile($imageUrl, 0, $thisUserImageDir);
break;
case Item::VIDEO:
$videoUrl = $item->getVideoVersions()[0]->getUrl();
$res = $this->downloadFile($videoUrl, 0, $thisUserImageDir);
break;
case Item::CAROUSEL:
foreach ($item->getCarouselMedia() as $imageItem) {
$imageUrl = $imageItem->getImageVersions2()->getCandidates()[0]->getUrl();
$res = $this->downloadFile($imageUrl, 0, $thisUserImageDir);
}
break;
}
sleep(5 * random_int(1, 10));
$userItemNum++;
if ($userItemNum > 500) {
break;
}
}
// Now we must update the maxId variable to the "next page".
// This will be a null value again when we've reached the last page!
// And we will stop looping through pages as soon as maxId becomes null.
$maxId = $response->getNextMaxId();
echo "\n new maxId: " . $maxId . "\n";
// Sleep for 5 seconds before requesting the next page. This is just an
// example of an okay sleep time. It is very important that your scripts
// always pause between requests that may run very rapidly, otherwise
// Instagram will throttle you temporarily for abusing their API!
echo "\n Sleeping for 5s...\n";
sleep(10);
} while ($maxId !== null); // Must use "!==" for comparison instead of "!=".
a:
}
// Get the UserPK ID for "natgeo" (National Geographic).
/**
* $userId = $ig->people->getUserIdForName('nyanchan22');
*
* // Starting at "null" means starting at the first page.
* $maxId = null;
* do {
* // Request the page corresponding to maxId.
* $response = $ig->timeline->getUserFeed($userId, $maxId);
*
* // In this example we're simply printing the IDs of this page's items.
* foreach ($response->getItems() as $item) {
* printf("[%s] https://instagram.com/p/%s/\n", $item->getId(), $item->getCode());
* }
*
* // Now we must update the maxId variable to the "next page".
* // This will be a null value again when we've reached the last page!
* // And we will stop looping through pages as soon as maxId becomes null.
* $maxId = $response->getNextMaxId();
*
* // Sleep for 5 seconds before requesting the next page. This is just an
* // example of an okay sleep time. It is very important that your scripts
* // always pause between requests that may run very rapidly, otherwise
* // Instagram will throttle you temporarily for abusing their API!
* echo "Sleeping for 5s...\n";
* sleep(5);
* } while ($maxId !== null); // Must use "!==" for comparison instead of "!=".
*/
} catch (Exception $e) {
echo 'scrapeUsers something went wrong: ' . $e->getTraceAsString() . "\n";
}
}
public function scrapeCollection()
{


+ 2
- 2
app/Services/NewNvshenService.php View File

@ -169,7 +169,7 @@ class NewNvshenService
} else {
$albumQl = $mainQl->get($baseAlbumUrl . $i . ".html");
}
}
// dump($albumQl->getHtml());
$albumList = $albumQl->find(".igalleryli > .igalleryli_div > .igalleryli_link")->attrs("href");
@ -192,7 +192,7 @@ class NewNvshenService
$page = $mainQl->get($baseUrl . $album, [], ['maxTry' => 5])->find(".albumInfo > span")->htmls();
$title = $mainQl->get($baseUrl . $album, [], ['maxTry' => 5])->find(".albumTitle > #htilte")->htmls();
$imageSource = $mainQl->get($baseUrl . $album, [], ['maxTry' => 5])->find("#hgallery img:nth-child(1)")->attr("src");
dump($imageSource);exit;
dump($imageSource);
$imageSourceParts = pathinfo($imageSource, PATHINFO_DIRNAME);
break;
} catch (ConnectException $e) {


+ 13
- 0
fail.log View File

@ -2448,3 +2448,16 @@
/Users/shixuesen/OneDrive/Pictures/instagram/limerencem_/ https://scontent-sjc3-1.cdninstagram.com/v/t51.2885-15/280690646_420109199537856_7984527567121575817_n.jpg?se=7&stp=dst-jpg_e35&_nc_ht=scontent-sjc3-1.cdninstagram.com&_nc_cat=100&_nc_ohc=MDdTeJadha4AX8oxxz5&edm=ABmJApABAAAA&ccb=7-4&ig_cache_key=MjgzNjYwOTY5MjMyODA5NzM4NQ%3D%3D.2-ccb7-4&oh=00_AT8YPHu1oqtmBmUyYfIBYvG27wVJaybjNXT0gE7aaJQ5BQ&oe=628A423D&_nc_sid=6136e7
/Users/shixuesen/OneDrive/Pictures/instagram/mobe_carrie0223/ https://scontent-sjc3-1.cdninstagram.com/v/t50.16885-16/10000000_575290063870295_2176192632399731540_n.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLjk2MC5pZ3R2LmJhc2VsaW5lIn0&_nc_ht=scontent-sjc3-1.cdninstagram.com&_nc_cat=111&_nc_ohc=UdFKE-wi4ngAX-tv9eU&edm=ABmJApABAAAA&vs=721070428925146_1808951524&_nc_vs=HBksFQAYJEdJQ1dtQUJYV2VZbk9Rc0NBRlFiQWxHcll6TWVidlZCQUFBRhUAAsgBABUAGCRHQXd2d3hCWlJHekE1d3NDQUtSTnFCX1BYUk1lYnZWQkFBQUYVAgLIAQAoABgAGwGIB3VzZV9vaWwBMRUAACa%2B9NCAv8LZPxUCKAJDMywXQGbQAAAAAAAYEmRhc2hfYmFzZWxpbmVfMV92MREAdewHAA%3D%3D&_nc_rid=800da169db&ccb=7-4&oe=62858148&oh=00_AT_XGuuHttTEnM3Eqcdz8jPhmgPo4LU2j_9Gho5enXIjPA&_nc_sid=6136e7
/Users/shixuesen/OneDrive/Pictures/instagram/Likes_new/ https://scontent-sjc3-1.cdninstagram.com/v/t51.2885-15/281384435_331717812436116_1816666054432280503_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-sjc3-1.cdninstagram.com&_nc_cat=100&_nc_ohc=pwqZ2ztM-NIAX-lLVW9&edm=AJ9x6zYBAAAA&ccb=7-4&ig_cache_key=Mjg0MDgxMTgzMjQ2MzAwMzAzOA%3D%3D.2-ccb7-4&oh=00_AT_I6GONjEzcTS_guMhnemH-o4P96PCo5uW2s-S2PhrDaA&oe=628CA8DD&_nc_sid=cff2a4
/Users/shixuesen/OneDrive/Pictures/instagram/na0912mi/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/61186834_839138086451498_6688797702998385363_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=102&_nc_ohc=HS4rzTAMAaoAX_u-JFD&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjA1MDk3MjA1NTI3NjU3Njk1OQ%3D%3D.2-ccb7-5&oh=00_AT-6-tRFfVUKekt0ypEFVSU18Tefgeaj35RwemlguKM0kQ&oe=629F8932&_nc_sid=6136e7
/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/173555113_158940469463752_1273918341459041977_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=104&_nc_ohc=WwBCCfr7BPkAX8Zx5IW&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MjM5NTU5MjE2MjQ5ODQyMA%3D%3D.2-ccb7-5&oh=00_AT-LQHrsJPVFwKzbWuy7tM3fZOgBVcHXecUYfCwTKgd9hA&oe=62A080D4&_nc_sid=6136e7
/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/172964141_447968899816612_2584625879344013922_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=104&_nc_ohc=NYE1pAxy9Q8AX_XKoBa&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MjM5NTU5MjIwNDQwNTg2MQ%3D%3D.2-ccb7-5&oh=00_AT_i5XSHCYqkDro9ptnOOW-BAAmGUWWZxY2DL42uLImuZw&oe=62A031CD&_nc_sid=6136e7
/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/173115739_2873810652872924_338860103281055918_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=108&_nc_ohc=UCYIbakzuAMAX_z1D2A&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MTY0ODc0Mjg1ODAyMzk0NQ%3D%3D.2-ccb7-5&oh=00_AT_TwbDnLucvpJQzLM767E2Qy4JYC0-PwF-RCjzFjmaOZQ&oe=62A0416D&_nc_sid=6136e7
/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-2.cdninstagram.com/v/t51.2885-15/173372067_290935769325108_1455377889709464537_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-2.cdninstagram.com&_nc_cat=100&_nc_ohc=oBtXfuDummwAX8ZjX4A&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MTY0ODc0MjgyNDMyNDMwOQ%3D%3D.2-ccb7-5&oh=00_AT9N3GuGpbEUTZ4YjjeaoEc_ENJbg1w0hWAEqeXfkzFBnQ&oe=62A012EC&_nc_sid=6136e7
/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/173525602_896032524297586_2777976376145060666_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=110&_nc_ohc=wln3zIBOTYIAX8jGzWi&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MTY0ODc0MjgzMjc0MDM2MA%3D%3D.2-ccb7-5&oh=00_AT9DOzOvYpoSSf205LgfKhTkvi96g0YucE3ij94AeOhrSA&oe=629FB367&_nc_sid=6136e7
/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/173144690_192316432527958_2445310581992088854_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=110&_nc_ohc=UlVWvNJZtXEAX9cvxeq&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MTY0ODc0Mjg0OTYzMDA0Mw%3D%3D.2-ccb7-5&oh=00_AT8lxJx_L1pu2MDdf4eZi_wQ1N6k2RHX91PnLJH7aYLpPQ&oe=62A1990F&_nc_sid=6136e7
/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-2.cdninstagram.com/v/t51.2885-15/172017493_205532384351931_4365940484017501301_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-2.cdninstagram.com&_nc_cat=103&_nc_ohc=UqL14M6Fel0AX8WA2n4&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MDk0MzM5NTY3OTY2NzU5OA%3D%3D.2-ccb7-5&oh=00_AT-9gAzMHHO6n_OdqwTgtayZHPyG4OzNPuT2rZC7Pdppow&oe=62A11966&_nc_sid=6136e7
/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/172723871_893234137887501_5067974151819050692_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=109&_nc_ohc=nkRo6X4xgT8AX_P1LwF&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MDk0MzM5NTU4NzQ5MzQ4NA%3D%3D.2-ccb7-5&oh=00_AT-vPkJcpo7c4YII6R61COswJ20ubHq3-6mGqj0q0L0mxg&oe=62A14CCF&_nc_sid=6136e7
/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-2.cdninstagram.com/v/t51.2885-15/172023579_584149625877295_524181339794551530_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-2.cdninstagram.com&_nc_cat=107&_nc_ohc=__cq2hZhscgAX8RQuaM&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MDk0MzM5NTU2MjM2NTkzMw%3D%3D.2-ccb7-5&oh=00_AT-qjhBnh-7tO_NKKUE-mXZo8eoOznhtRr_QHg-eoenaFw&oe=62A0480B&_nc_sid=6136e7
/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/171719415_211659767399393_4429955357558979093_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=105&_nc_ohc=GawmD2IxEs0AX80cCDx&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MDk0MzM5NTU3OTE1Njg3MA%3D%3D.2-ccb7-5&oh=00_AT-HdXp09qUSLWIVYnT1EDXpDn5fSJzxwsa1RxQQmCdH-A&oe=629FAEE7&_nc_sid=6136e7
/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/172334246_806047096931685_4756653144307888773_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=109&_nc_ohc=M8sa_AyFVSQAX9j7yNz&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU1MDk0MzM5NTU4NzQ5NDIwNw%3D%3D.2-ccb7-5&oh=00_AT95MmNpainEC-xaVDzCNpuHfpYWkXaGS1K_xILTO38NUw&oe=629FF886&_nc_sid=6136e7
/Users/shixuesen/OneDrive/Pictures/instagram/chen_01_24/ https://scontent-lax3-1.cdninstagram.com/v/t51.2885-15/170241954_760808061306190_5646851458076937382_n.jpg?se=7&stp=dst-jpg_e35&cb=9ad74b5e-88ad7ee8&_nc_ht=scontent-lax3-1.cdninstagram.com&_nc_cat=110&_nc_ohc=Gte0wJik8KEAX8kt9FI&edm=ABmJApABAAAA&ccb=7-5&ig_cache_key=MjU0ODc3ODQ5NTExNzE5MTIyMQ%3D%3D.2-ccb7-5&oh=00_AT9ekl81QEFFSwZIDj7NwcHGI45KuwyxLITw64XZh0mWDA&oe=62A0FA88&_nc_sid=6136e7

Loading…
Cancel
Save