|
|
@ -10,6 +10,13 @@ use InstagramAPI\Response\Model\Item; |
|
|
|
|
|
|
|
|
class Ins24Service |
|
|
class Ins24Service |
|
|
{ |
|
|
{ |
|
|
|
|
|
|
|
|
|
|
|
function logFailUrl($filePrefix, $fileUrl) |
|
|
|
|
|
{ |
|
|
|
|
|
$failLogFile = "fail.log"; |
|
|
|
|
|
$myfile = file_put_contents($failLogFile, $filePrefix . "\t" . $fileUrl . PHP_EOL, FILE_APPEND | LOCK_EX); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
public function scrapeUsers() |
|
|
public function scrapeUsers() |
|
|
{ |
|
|
{ |
|
|
|
|
|
|
|
|
@ -18,12 +25,12 @@ class Ins24Service |
|
|
try { |
|
|
try { |
|
|
|
|
|
|
|
|
$userList = file("/Users/shixuesen/OneDrive/Pictures/instagram/user.txt"); |
|
|
$userList = file("/Users/shixuesen/OneDrive/Pictures/instagram/user.txt"); |
|
|
// $userList = array_slice($userList, 17);
|
|
|
|
|
|
|
|
|
$userList = array_slice($userList, 24); |
|
|
// $userList = ['1992.ai_'];
|
|
|
// $userList = ['1992.ai_'];
|
|
|
// print_r($userList);exit;
|
|
|
|
|
|
|
|
|
// print_r($userList);exit;
|
|
|
// print_r($userList);
|
|
|
// print_r($userList);
|
|
|
foreach ($userList as $userName) { |
|
|
foreach ($userList as $userName) { |
|
|
usleep(random_int(100, 1000) * 10000); |
|
|
|
|
|
|
|
|
usleep(random_int(10, 1000) * 100000); |
|
|
$trueName = trim($userName); |
|
|
$trueName = trim($userName); |
|
|
|
|
|
|
|
|
$thisUserImageDir = $baseImageDir . $trueName . "/"; |
|
|
$thisUserImageDir = $baseImageDir . $trueName . "/"; |
|
|
@ -32,9 +39,12 @@ class Ins24Service |
|
|
for (; ;) { |
|
|
for (; ;) { |
|
|
try { |
|
|
try { |
|
|
$data = $this->queryImage($trueName, $pageNo, $maxId); |
|
|
$data = $this->queryImage($trueName, $pageNo, $maxId); |
|
|
|
|
|
if (count($data["resourceList"]) == 0) { |
|
|
|
|
|
break; |
|
|
|
|
|
} |
|
|
foreach ($data["resourceList"] as $resource) { |
|
|
foreach ($data["resourceList"] as $resource) { |
|
|
$res = $this->downloadFile($resource["mediaURL"], 0, $thisUserImageDir); |
|
|
$res = $this->downloadFile($resource["mediaURL"], 0, $thisUserImageDir); |
|
|
usleep(random_int(100, 1000) * 10000); |
|
|
|
|
|
|
|
|
usleep(random_int(10, 1000) * 100000); |
|
|
if ($res == 0) { |
|
|
if ($res == 0) { |
|
|
break 2; |
|
|
break 2; |
|
|
} |
|
|
} |
|
|
@ -57,7 +67,7 @@ class Ins24Service |
|
|
$curl = curl_init(); |
|
|
$curl = curl_init(); |
|
|
|
|
|
|
|
|
curl_setopt_array($curl, array( |
|
|
curl_setopt_array($curl, array( |
|
|
CURLOPT_URL => "https://www.ins246.com/ins/search.html", |
|
|
|
|
|
|
|
|
CURLOPT_URL => "http://www.ins246.com/ins/search.html", |
|
|
CURLOPT_RETURNTRANSFER => true, |
|
|
CURLOPT_RETURNTRANSFER => true, |
|
|
CURLOPT_ENCODING => "", |
|
|
CURLOPT_ENCODING => "", |
|
|
CURLOPT_MAXREDIRS => 10, |
|
|
CURLOPT_MAXREDIRS => 10, |
|
|
@ -75,7 +85,7 @@ class Ins24Service |
|
|
"x-requested-with: XMLHttpRequest", |
|
|
"x-requested-with: XMLHttpRequest", |
|
|
"user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36", |
|
|
"user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36", |
|
|
"content-type: application/x-www-form-urlencoded; charset=UTF-8", |
|
|
"content-type: application/x-www-form-urlencoded; charset=UTF-8", |
|
|
"origin: https://www.ins246.com", |
|
|
|
|
|
|
|
|
"origin: http://www.ins246.com", |
|
|
"sec-fetch-site: same-origin", |
|
|
"sec-fetch-site: same-origin", |
|
|
"sec-fetch-mode: cors", |
|
|
"sec-fetch-mode: cors", |
|
|
"sec-fetch-dest: empty", |
|
|
"sec-fetch-dest: empty", |
|
|
@ -86,11 +96,19 @@ class Ins24Service |
|
|
)); |
|
|
)); |
|
|
|
|
|
|
|
|
$response = curl_exec($curl); |
|
|
$response = curl_exec($curl); |
|
|
|
|
|
|
|
|
|
|
|
if(curl_exec($curl) === false) |
|
|
|
|
|
{ |
|
|
|
|
|
echo 'Curl error: ' . curl_error($curl); |
|
|
|
|
|
} else { |
|
|
|
|
|
echo $response; |
|
|
|
|
|
} |
|
|
curl_close($curl); |
|
|
curl_close($curl); |
|
|
$formatResponse = json_decode($response, true); |
|
|
$formatResponse = json_decode($response, true); |
|
|
if ($formatResponse["code"] == 200) { |
|
|
if ($formatResponse["code"] == 200) { |
|
|
return $formatResponse["data"]; |
|
|
return $formatResponse["data"]; |
|
|
|
|
|
} else { |
|
|
|
|
|
echo $response; |
|
|
|
|
|
throw new \Exception("出现异常"); |
|
|
} |
|
|
} |
|
|
// echo $response;
|
|
|
// echo $response;
|
|
|
|
|
|
|
|
|
@ -134,7 +152,7 @@ class Ins24Service |
|
|
curl_setopt($ch, CURLOPT_VERBOSE, 1); |
|
|
curl_setopt($ch, CURLOPT_VERBOSE, 1); |
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); |
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); |
|
|
curl_setopt($ch, CURLOPT_AUTOREFERER, false); |
|
|
curl_setopt($ch, CURLOPT_AUTOREFERER, false); |
|
|
curl_setopt($ch, CURLOPT_REFERER, "https://www.ins246.com/ins/gosearch.html"); |
|
|
|
|
|
|
|
|
curl_setopt($ch, CURLOPT_REFERER, "http://www.ins246.com/ins/gosearch.html"); |
|
|
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); |
|
|
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); |
|
|
curl_setopt($ch, CURLOPT_HEADER, 0); |
|
|
curl_setopt($ch, CURLOPT_HEADER, 0); |
|
|
$image = curl_exec($ch); |
|
|
$image = curl_exec($ch); |
|
|
|