queryImage($trueName, $pageNo, $maxId); if (count($data["resourceList"]) == 0) { break; } if ($flag && $data["maxId"] != "2263028000987470356_13049295449") { echo "maxId not matched! \n"; $flag = false; continue; } foreach ($data["resourceList"] as $resource) { $res = $this->downloadFile($resource["mediaURL"], 0, $thisUserImageDir); usleep(random_int(10, 1000) * 100000); if ($res == 0) { break; } } $maxId = $data["maxId"]; $pageNo ++; } catch (\Exception $e) { Log::error("ins get user id for name error: " . $e->getMessage() . " username is " . $userName); } } } } catch (\Exception $e) { } } public function scrapeUsersVideos() { $baseImageDir = "/Users/shixuesen/OneDrive/Pictures/instagram/"; try { $userList = file("/Users/shixuesen/OneDrive/Pictures/instagram/user.txt"); // $userList = array_slice($userList, 24); // $userList = ['1992.ai_']; // print_r($userList);exit; // print_r($userList); foreach ($userList as $userName) { usleep(random_int(10, 1000) * 100000); $trueName = trim($userName); $thisUserImageDir = $baseImageDir . $trueName . "/"; $maxId = ""; $pageNo = 1; for (; ;) { try { $data = $this->queryVideo($trueName, $pageNo, $maxId); if (count($data["resourceList"]) == 0) { break; } foreach ($data["resourceList"] as $resource) { $res = $this->downloadFile($resource["videoUrl"], 0, $thisUserImageDir); usleep(random_int(10, 1000) * 100000); if ($res == 0) { break 2; } } $maxId = $data["maxId"]; $pageNo ++; } catch (\Exception $e) { Log::error("ins get user id for name error: " . $e->getMessage() . " username is " . $userName); } } } } catch (\Exception $e) { } } private function queryImage($userName, $pageNo = 1, $maxId = "") { $curl = curl_init(); curl_setopt_array($curl, array( CURLOPT_URL => "http://www.ins246.com/ins/search.html", CURLOPT_RETURNTRANSFER => true, CURLOPT_ENCODING => "", CURLOPT_MAXREDIRS => 10, CURLOPT_TIMEOUT => 0, CURLOPT_FOLLOWLOCATION => true, CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, CURLOPT_CUSTOMREQUEST => "POST", // CURLOPT_POSTFIELDS => "search=https%3A%2F%2Fwww.instagram.com%2Fnz0502%2F&pageNo=1&type=1&maxId=", CURLOPT_POSTFIELDS => "search=https://www.instagram.com/$userName/&pageNo=$pageNo&type=1&maxId=$maxId", CURLOPT_HTTPHEADER => array( "authority: www.ins246.com", "pragma: no-cache", "cache-control: no-cache", "accept: */*", "x-requested-with: XMLHttpRequest", "user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36", "content-type: application/x-www-form-urlencoded; charset=UTF-8", "origin: http://www.ins246.com", "sec-fetch-site: same-origin", "sec-fetch-mode: cors", "sec-fetch-dest: empty", "referer: https://www.ins246.com/ins/gosearch.html", "accept-language: en-US,en;q=0.9,zh-TW;q=0.8,zh;q=0.7,ja;q=0.6,zh-CN;q=0.5", "cookie: __cfduid=d94e7974e4bbec11a34d72efca94204591604573518; JSESSIONID=EF65EB47872FB6EFEFADA05C8CD140EA; _ga=GA1.2.343334980.1604573522; _gid=GA1.2.1674010257.1604573522; JSESSIONID=6A642D962A119268816A05B9B3D7C8A0" ), )); $response = curl_exec($curl); if(curl_exec($curl) === false) { echo 'Curl error: ' . curl_error($curl); } else { echo $response; } curl_close($curl); $formatResponse = json_decode($response, true); if ($formatResponse["code"] == 200) { return $formatResponse["data"]; } else { echo $response; throw new \Exception("出现异常"); } // echo $response; } private function queryVideo($userName, $pageNo = 1, $maxId = "") { $curl = curl_init(); curl_setopt_array($curl, array( CURLOPT_URL => "http://www.ins246.com/ins/search.html", CURLOPT_RETURNTRANSFER => true, CURLOPT_ENCODING => "", CURLOPT_MAXREDIRS => 10, CURLOPT_TIMEOUT => 0, CURLOPT_FOLLOWLOCATION => true, CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, CURLOPT_CUSTOMREQUEST => "POST", // CURLOPT_POSTFIELDS => "search=https%3A%2F%2Fwww.instagram.com%2Fnz0502%2F&pageNo=1&type=1&maxId=", CURLOPT_POSTFIELDS => "search=https://www.instagram.com/$userName/&pageNo=$pageNo&type=0&maxId=$maxId", CURLOPT_HTTPHEADER => array( "authority: www.ins246.com", "pragma: no-cache", "cache-control: no-cache", "accept: */*", "x-requested-with: XMLHttpRequest", "user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36", "content-type: application/x-www-form-urlencoded; charset=UTF-8", "origin: http://www.ins246.com", "sec-fetch-site: same-origin", "sec-fetch-mode: cors", "sec-fetch-dest: empty", "referer: https://www.ins246.com/ins/gosearch.html", "accept-language: en-US,en;q=0.9,zh-TW;q=0.8,zh;q=0.7,ja;q=0.6,zh-CN;q=0.5", "cookie: __cfduid=d94e7974e4bbec11a34d72efca94204591604573518; JSESSIONID=EF65EB47872FB6EFEFADA05C8CD140EA; _ga=GA1.2.343334980.1604573522; _gid=GA1.2.1674010257.1604573522; JSESSIONID=6A642D962A119268816A05B9B3D7C8A0" ), )); $response = curl_exec($curl); if(curl_exec($curl) === false) { echo 'Curl error: ' . curl_error($curl); } else { echo $response; } curl_close($curl); $formatResponse = json_decode($response, true); if ($formatResponse["code"] == 200) { return $formatResponse["data"]; } else { echo $response; throw new \Exception("出现异常"); } // echo $response; } function downloadFile($filenameUrl, $flag = 0, $filePrefix = "") { //echo $filenameUrl;exit; $filePathInfo = pathinfo($filenameUrl); $filename = $filePathInfo['filename']; // echo $filename; // echo "\n"; $pos = strpos($filename, "?"); // echo "pos: ".$pos; // echo "\n"; if ($pos > 0) { $filename = substr($filename, 0, $pos); } // echo $filename;exit; // if ($filename == "33020038_640464766303508_27725890796388352_n.jpg"){ // $flag = 1; // } if (file_exists($filePrefix . $filename) || file_exists($filePrefix . $filename . ".back")) { echo "\n file exists " . $filePrefix . $filename; return 0; } try { $cn_match = "https://www.ins246.com/ins/gosearch.html"; $options = array( 'ssl' => array( 'verify_peer' => true, 'cafile' => "/Users/shixuesen/Downloads/cacert.pem", 'ciphers' => 'HIGH:TLSv1.2:TLSv1.1:TLSv1.0:!SSLv3:!SSLv2', 'CN_match' => $cn_match, 'disable_compression' => true, ) ); $context = stream_context_create($options); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $filenameUrl); curl_setopt($ch, CURLOPT_VERBOSE, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_AUTOREFERER, false); curl_setopt($ch, CURLOPT_REFERER, "http://www.ins246.com/ins/gosearch.html"); curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); curl_setopt($ch, CURLOPT_HEADER, 0); $image = curl_exec($ch); curl_close($ch); // $image = file_get_contents($filenameUrl, false, null); } catch (\Throwable $e) { var_dump($e->getMessage()); $this->logFailUrl($filePrefix, $filenameUrl); return -1; } echo "new filename: " . $filePrefix . $filename . "\n"; $downloadResult = file_put_contents($filePrefix . $filename, $image); if (!$downloadResult) { $this->logFailUrl($filePrefix, $filenameUrl); return -1; } else { return 1; } } }