|
|
@ -549,6 +549,133 @@ class InstagramService |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
public function scrapeUsersInFile($file) |
|
|
|
|
|
{ |
|
|
|
|
|
|
|
|
|
|
|
$baseImageDir = "/Users/shixuesen/OneDrive/Pictures/instagram/"; |
|
|
|
|
|
|
|
|
|
|
|
try { |
|
|
|
|
|
|
|
|
|
|
|
$userList = file($file); |
|
|
|
|
|
foreach ($userList as $userName) { |
|
|
|
|
|
$trueName = trim($userName); |
|
|
|
|
|
$thisUserImageDir = $baseImageDir . $trueName . DIRECTORY_SEPARATOR; |
|
|
|
|
|
if (!is_dir($thisUserImageDir)) { |
|
|
|
|
|
mkdir($thisUserImageDir); |
|
|
|
|
|
} |
|
|
|
|
|
try { |
|
|
|
|
|
$userId = $this->ig->people->getUserIdForName(trim($userName)); |
|
|
|
|
|
} catch (Exception $e) { |
|
|
|
|
|
// if ($e instanceof UserNotFou)
|
|
|
|
|
|
Log::error("ins get user id for name error: " . $e->getMessage() . " username is " . $userName); |
|
|
|
|
|
$userId = null; |
|
|
|
|
|
if (array_key_exists(trim($userName), $this->userList)) { |
|
|
|
|
|
$userId = Arr::get($userList, trim($userName)); |
|
|
|
|
|
} |
|
|
|
|
|
if ($userId == null) { |
|
|
|
|
|
continue; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
echo "\n username: " . $trueName . " " . $userId . "\n"; |
|
|
|
|
|
|
|
|
|
|
|
try { |
|
|
|
|
|
$response = $this->ig->story->getUserReelMediaFeed($userId); |
|
|
|
|
|
} catch (Exception $e) { |
|
|
|
|
|
Log::error("current user has error, $userName, " . $e->getMessage()); |
|
|
|
|
|
} |
|
|
|
|
|
foreach ($response->getItems() as $item) { |
|
|
|
|
|
if (Item::VIDEO == $item->getMediaType()) { |
|
|
|
|
|
$videoUrl = $item->getVideoVersions()[0]->getUrl(); |
|
|
|
|
|
$res = $this->downloadFile($videoUrl, 0, $thisUserImageDir); |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
$userItemNum = 0; |
|
|
|
|
|
sleep(5 * random_int(1, 10)); |
|
|
|
|
|
$maxId = null; |
|
|
|
|
|
do { |
|
|
|
|
|
// Request the page corresponding to maxId.
|
|
|
|
|
|
echo "\n current maxId: " . $maxId; |
|
|
|
|
|
try { |
|
|
|
|
|
$response = $this->ig->timeline->getUserFeed($userId, $maxId); |
|
|
|
|
|
} catch (Exception $e) { |
|
|
|
|
|
Log::error("current user has error, $userName, " . $e->getMessage()); |
|
|
|
|
|
continue 2; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// In this example we're simply printing the IDs of this page's items.
|
|
|
|
|
|
foreach ($response->getItems() as $item) { |
|
|
|
|
|
switch ($item->getMediaType()) { |
|
|
|
|
|
case Item::PHOTO: |
|
|
|
|
|
$imageUrl = $item->getImageVersions2()->getCandidates()[0]->getUrl(); |
|
|
|
|
|
$res = $this->downloadFile($imageUrl, 0, $thisUserImageDir); |
|
|
|
|
|
break; |
|
|
|
|
|
case Item::VIDEO: |
|
|
|
|
|
$videoUrl = $item->getVideoVersions()[0]->getUrl(); |
|
|
|
|
|
$res = $this->downloadFile($videoUrl, 0, $thisUserImageDir); |
|
|
|
|
|
break; |
|
|
|
|
|
case Item::CAROUSEL: |
|
|
|
|
|
foreach ($item->getCarouselMedia() as $imageItem) { |
|
|
|
|
|
$imageUrl = $imageItem->getImageVersions2()->getCandidates()[0]->getUrl(); |
|
|
|
|
|
$res = $this->downloadFile($imageUrl, 0, $thisUserImageDir); |
|
|
|
|
|
} |
|
|
|
|
|
break; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
sleep(5 * random_int(1, 10)); |
|
|
|
|
|
$userItemNum++; |
|
|
|
|
|
if ($userItemNum > 500) { |
|
|
|
|
|
break; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Now we must update the maxId variable to the "next page".
|
|
|
|
|
|
// This will be a null value again when we've reached the last page!
|
|
|
|
|
|
// And we will stop looping through pages as soon as maxId becomes null.
|
|
|
|
|
|
$maxId = $response->getNextMaxId(); |
|
|
|
|
|
echo "\n new maxId: " . $maxId . "\n"; |
|
|
|
|
|
|
|
|
|
|
|
// Sleep for 5 seconds before requesting the next page. This is just an
|
|
|
|
|
|
// example of an okay sleep time. It is very important that your scripts
|
|
|
|
|
|
// always pause between requests that may run very rapidly, otherwise
|
|
|
|
|
|
// Instagram will throttle you temporarily for abusing their API!
|
|
|
|
|
|
echo "\n Sleeping for 5s...\n"; |
|
|
|
|
|
sleep(10); |
|
|
|
|
|
} while ($maxId !== null); // Must use "!==" for comparison instead of "!=".
|
|
|
|
|
|
a: |
|
|
|
|
|
} |
|
|
|
|
|
// Get the UserPK ID for "natgeo" (National Geographic).
|
|
|
|
|
|
/** |
|
|
|
|
|
* $userId = $ig->people->getUserIdForName('nyanchan22'); |
|
|
|
|
|
* |
|
|
|
|
|
* // Starting at "null" means starting at the first page.
|
|
|
|
|
|
* $maxId = null; |
|
|
|
|
|
* do { |
|
|
|
|
|
* // Request the page corresponding to maxId.
|
|
|
|
|
|
* $response = $ig->timeline->getUserFeed($userId, $maxId); |
|
|
|
|
|
* |
|
|
|
|
|
* // In this example we're simply printing the IDs of this page's items.
|
|
|
|
|
|
* foreach ($response->getItems() as $item) { |
|
|
|
|
|
* printf("[%s] https://instagram.com/p/%s/\n", $item->getId(), $item->getCode()); |
|
|
|
|
|
* } |
|
|
|
|
|
* |
|
|
|
|
|
* // Now we must update the maxId variable to the "next page".
|
|
|
|
|
|
* // This will be a null value again when we've reached the last page!
|
|
|
|
|
|
* // And we will stop looping through pages as soon as maxId becomes null.
|
|
|
|
|
|
* $maxId = $response->getNextMaxId(); |
|
|
|
|
|
* |
|
|
|
|
|
* // Sleep for 5 seconds before requesting the next page. This is just an
|
|
|
|
|
|
* // example of an okay sleep time. It is very important that your scripts
|
|
|
|
|
|
* // always pause between requests that may run very rapidly, otherwise
|
|
|
|
|
|
* // Instagram will throttle you temporarily for abusing their API!
|
|
|
|
|
|
* echo "Sleeping for 5s...\n"; |
|
|
|
|
|
* sleep(5); |
|
|
|
|
|
* } while ($maxId !== null); // Must use "!==" for comparison instead of "!=".
|
|
|
|
|
|
*/ |
|
|
|
|
|
} catch (Exception $e) { |
|
|
|
|
|
echo 'scrapeUsers something went wrong: ' . $e->getTraceAsString() . "\n"; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
public function scrapeCollection() |
|
|
public function scrapeCollection() |
|
|
{ |
|
|
{ |
|
|
|
|
|
|
|
|
|