<?php
|
|
|
|
namespace App\Services;
|
|
use Illuminate\Support\Arr;
|
|
use Illuminate\Support\Facades\Log;
|
|
|
|
set_time_limit(0);
|
|
ini_set('memory_limit','-1');
|
|
date_default_timezone_set('UTC');
|
|
|
|
class WeiboService
|
|
{
|
|
|
|
public function scrapeWeiboPicAndVideo($content)
|
|
{
|
|
// if (!isset($_REQUEST['content'])) die('NoData');
|
|
//
|
|
// $content = trim($_REQUEST['content']);
|
|
|
|
|
|
// @mkdir('data');
|
|
// @mkdir('data/Kendall');
|
|
// @mkdir('data/video');
|
|
// $video_dir = "/Volumes/intel660p/video/weibo/ruye";
|
|
// $image_dir = "/Volumes/intel660p/image/weibo/ruya";
|
|
// $video_dir = "/Volumes/Samsung/weibo/people/徐圣佑/video";
|
|
// $image_dir = "/Volumes/Samsung/weibo/people/徐圣佑/image";
|
|
|
|
$video_dir = "/Volumes/Samsung/weibo/video";
|
|
$image_dir = "/Volumes/Samsung/weibo/image";
|
|
|
|
try {
|
|
if (strlen($content) > 0) {
|
|
// $fname = 'data/' . microtime_float() . '.txt';
|
|
// file_put_contents( $fname , $content );
|
|
$decoded_json = json_decode($content, true);
|
|
$cards = $decoded_json['data']['cards'];
|
|
foreach ($cards as $card) {
|
|
if (!array_key_exists("mblog", $card)) {
|
|
continue;
|
|
}
|
|
$mblog = $card['mblog'];
|
|
$subdir = "data/video";
|
|
if (!file_exists($subdir)) {
|
|
mkdir($subdir);
|
|
}
|
|
echo json_encode($mblog);
|
|
if (array_key_exists("pics", $mblog)) {
|
|
$pics = $mblog['pics'];
|
|
$user = Arr::get($mblog, "user", []);
|
|
$text = Arr::get($mblog, "text", '');
|
|
var_dump($pics);
|
|
self::process_pic($pics, $image_dir, $user, $text);
|
|
// foreach ($pics as $pic) {
|
|
// if (array_key_exists("large", $pic)) {
|
|
// $pic_url = $pic['large']['url'];
|
|
// $h2w = $pic['large']['geo']['height']/$pic['large']['geo']['width'];
|
|
// } else {
|
|
// $pic_url = $pic['url'];
|
|
// $h2w = $pic['geo']['height'] / $pic['geo']['width'];
|
|
// }
|
|
// if ($h2w > 15) {
|
|
// continue;
|
|
// }
|
|
// $pic_name = pathinfo($pic_url, PATHINFO_FILENAME);
|
|
// $pic_ext = pathinfo($pic_url, PATHINFO_EXTENSION);
|
|
// $file_name = $subdir . "/" . $pic_name . "." . $pic_ext;
|
|
// if (!file_exists($file_name)) {
|
|
// $pic_content = file_get_contents($pic_url);
|
|
// // echo $pic_content;
|
|
// file_put_contents($file_name, $pic_content);
|
|
// }
|
|
// }
|
|
} else if (array_key_exists("retweeted_status", $mblog)) {
|
|
if (array_key_exists("pics", $mblog["retweeted_status"])) {
|
|
$pics = $mblog["retweeted_status"]['pics'];
|
|
# code...
|
|
$user = Arr::get($mblog["retweeted_status"], "user", []);
|
|
$text = Arr::get($mblog["retweeted_status"], "text", '');
|
|
self::process_pic($pics, $image_dir, $user, $text);
|
|
}
|
|
# code...
|
|
} else if (array_key_exists("page_info", $mblog) && array_key_exists("media_info", $mblog["page_info"])) {
|
|
# code...
|
|
$page_info = $mblog["page_info"];
|
|
$media_info = $mblog["page_info"]["media_info"];
|
|
$video_url = Arr::get($media_info, "mp4_720p_mp4", "");
|
|
if ($video_url == "") {
|
|
$video_url = Arr::get($media_info, "mp4_hd_url", "");
|
|
}
|
|
if ($video_url == "") {
|
|
$video_url = Arr::get($media_info, "mp4_sd_url", "");
|
|
}
|
|
if ($video_url != "") {
|
|
self::process_video($video_url, $video_dir, $page_info["content2"]);
|
|
}
|
|
}
|
|
}
|
|
|
|
// die('save to ' . $fname . ' url = ' . $_REQUEST['url'] );
|
|
} else {
|
|
die('empty');
|
|
}
|
|
} catch (\Exception $e) {
|
|
Log::error($e);
|
|
Log::error($e->getTraceAsString());
|
|
// Log::info($content);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
function microtime_float()
|
|
{
|
|
list($usec, $sec) = explode(" ", microtime());
|
|
return ((float)$usec + (float)$sec);
|
|
}
|
|
|
|
function process_pic($pics, $subdir, $user, $text)
|
|
{
|
|
if (!file_exists($subdir)) {
|
|
mkdir($subdir);
|
|
}
|
|
$h2w = 0;
|
|
foreach ($pics as $pic) {
|
|
if (array_key_exists("large", $pic)) {
|
|
$pic_url = $pic['large']['url'];
|
|
if (gettype($pic["large"]["geo"]) == "array") {
|
|
$h2w = $pic['large']['geo']['height'] / $pic['large']['geo']['width'];
|
|
}
|
|
} else {
|
|
$pic_url = $pic['url'];
|
|
if (gettype($pic["geo"]) == "array") {
|
|
$h2w = $pic['geo']['height'] / $pic['geo']['width'];
|
|
}
|
|
}
|
|
if ($h2w > 15) {
|
|
continue;
|
|
}
|
|
$pic_name = pathinfo($pic_url, PATHINFO_FILENAME);
|
|
$pic_ext = pathinfo($pic_url, PATHINFO_EXTENSION);
|
|
$user_name = Arr::get($user, "screen_name", '');
|
|
if ($user_name != '') {
|
|
$pic_name = $user_name . '--' .$pic_name;
|
|
}
|
|
$file_name = $subdir . "/" . $pic_name . "." . $pic_ext;
|
|
if (!file_exists($file_name)) {
|
|
$pic_content = file_get_contents($pic_url);
|
|
// echo $pic_content;
|
|
file_put_contents($file_name, $pic_content);
|
|
}
|
|
}
|
|
}
|
|
|
|
function process_video($video_url, $subdir, $video_name)
|
|
{
|
|
Log::info("video_url: " . $video_url);
|
|
if (!file_exists($subdir)) {
|
|
mkdir($subdir);
|
|
}
|
|
$url_params = parse_url($video_url);
|
|
parse_str($url_params["query"], $params);
|
|
$video_origin_name = $url_params["path"];
|
|
$video_origin_name = substr($video_origin_name, 1);
|
|
$video_origin_name = str_replace("stream/", "--", $video_origin_name);
|
|
$video_origin_name = str_replace("/", "", $video_origin_name);
|
|
$video_name = preg_replace("/(http|https|ftp)(.)*([a-z0-9\-\.\_])+/i", "", $video_name);
|
|
$now = strtotime(date("y-m-d h:i:s"));
|
|
if ($now > $params['Expires']) {
|
|
Log::error("视频有效期已过,now is " . $now .", Expires is ". $params['Expires']);
|
|
abort(404);
|
|
}
|
|
|
|
// $video_origin_name = pathinfo($video_url, PATHINFO_FILENAME);
|
|
// $video_ext = pathinfo($video_url, PATHINFO_EXTENSION);
|
|
$file_name = $subdir . "/" . $video_name . "--" . $video_origin_name;
|
|
if (!file_exists($file_name)) {
|
|
# code...
|
|
$video_content = file_get_contents($video_url);
|
|
file_put_contents($file_name, $video_content);
|
|
}
|
|
}
|
|
|
|
|
|
}
|