You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

272 lines
12 KiB

<?php
namespace App\Services;
use Illuminate\Support\Arr;
use Illuminate\Support\Facades\Log;
set_time_limit(0);
ini_set('memory_limit','-1');
date_default_timezone_set('UTC');
class WeiboService
{
public function scrapeWeiboPicAndVideo($content)
{
// if (!isset($_REQUEST['content'])) die('NoData');
//
// $content = trim($_REQUEST['content']);
// echo $content;exit;
// @mkdir('data');
// @mkdir('data/Kendall');
// @mkdir('data/video');
// $video_dir = "/Volumes/intel660p/video/weibo/ruye";
// $image_dir = "/Volumes/intel660p/image/weibo/ruya";
// $video_dir = "/Volumes/Samsung/weibo/people/徐圣佑/video";
// $image_dir = "/Volumes/Samsung/weibo/people/徐圣佑/image";
$video_dir = "/Volumes/Samsung/weibo/video";
$image_dir = "/Volumes/Samsung/weibo/image";
try {
if (strlen($content) > 0) {
// $fname = 'data/' . microtime_float() . '.txt';
// file_put_contents( $fname , $content );
$decoded_json = json_decode($content, true);
$cards = $decoded_json['data']['cards'];
foreach ($cards as $card) {
if (!array_key_exists("mblog", $card)) {
continue;
}
$mblog = $card['mblog'];
$subdir = "data/video";
if (!file_exists($subdir)) {
mkdir($subdir);
}
echo json_encode($mblog);
if (array_key_exists("pics", $mblog)) {
$pics = $mblog['pics'];
$user = Arr::get($mblog, "user", []);
$text = Arr::get($mblog, "text", '');
var_dump($pics);
self::process_pic($pics, $image_dir, $user, $text);
// foreach ($pics as $pic) {
// if (array_key_exists("large", $pic)) {
// $pic_url = $pic['large']['url'];
// $h2w = $pic['large']['geo']['height']/$pic['large']['geo']['width'];
// } else {
// $pic_url = $pic['url'];
// $h2w = $pic['geo']['height'] / $pic['geo']['width'];
// }
// if ($h2w > 15) {
// continue;
// }
// $pic_name = pathinfo($pic_url, PATHINFO_FILENAME);
// $pic_ext = pathinfo($pic_url, PATHINFO_EXTENSION);
// $file_name = $subdir . "/" . $pic_name . "." . $pic_ext;
// if (!file_exists($file_name)) {
// $pic_content = file_get_contents($pic_url);
// // echo $pic_content;
// file_put_contents($file_name, $pic_content);
// }
// }
} else if (array_key_exists("retweeted_status", $mblog)) {
if (array_key_exists("pics", $mblog["retweeted_status"])) {
$pics = $mblog["retweeted_status"]['pics'];
# code...
$user = Arr::get($mblog["retweeted_status"], "user", []);
$text = Arr::get($mblog["retweeted_status"], "text", '');
self::process_pic($pics, $image_dir, $user, $text);
}
# code...
} else if (array_key_exists("page_info", $mblog) && array_key_exists("media_info", $mblog["page_info"])) {
# code...
$page_info = $mblog["page_info"];
$media_info = $mblog["page_info"]["media_info"];
$medis_urls = $mblog["page_info"]["urls"];
$video_url = Arr::get($medis_urls, "mp4_720p_mp4", "");
if ($video_url == "") {
$video_url = Arr::get($medis_urls, "mp4_hd_url", "");
}
if ($video_url == "") {
$video_url = Arr::get($medis_urls, "mp4_ld_mp4", "");
}
if ($video_url != "") {
self::process_video($video_url, $video_dir, $page_info["content2"]);
}
}
}
// die('save to ' . $fname . ' url = ' . $_REQUEST['url'] );
} else {
die('empty');
}
} catch (\Exception $e) {
Log::error($e);
Log::error($e->getTraceAsString());
// Log::info($content);
}
return true;
}
public function scrapeGroupWeiboPicAndVideo($content)
{
$video_dir = "/Volumes/Samsung/weibo/video";
$image_dir = "/Volumes/Samsung/weibo/image";
try {
if (strlen($content) > 0) {
// $fname = 'data/' . microtime_float() . '.txt';
// file_put_contents( $fname , $content );
$decoded_json = json_decode($content, true);
$cards = $decoded_json['data']['statuses'];
foreach ($cards as $card) {
// if (!array_key_exists("mblog", $card)) {
// continue;
// }
$mblog = $card;
$subdir = "data/video";
if (!file_exists($subdir)) {
mkdir($subdir);
}
echo json_encode($mblog);
if (array_key_exists("pics", $mblog)) {
$pics = $mblog['pics'];
$user = Arr::get($mblog, "user", []);
$text = Arr::get($mblog, "text", '');
var_dump($pics);
self::process_pic($pics, $image_dir, $user, $text);
// foreach ($pics as $pic) {
// if (array_key_exists("large", $pic)) {
// $pic_url = $pic['large']['url'];
// $h2w = $pic['large']['geo']['height']/$pic['large']['geo']['width'];
// } else {
// $pic_url = $pic['url'];
// $h2w = $pic['geo']['height'] / $pic['geo']['width'];
// }
// if ($h2w > 15) {
// continue;
// }
// $pic_name = pathinfo($pic_url, PATHINFO_FILENAME);
// $pic_ext = pathinfo($pic_url, PATHINFO_EXTENSION);
// $file_name = $subdir . "/" . $pic_name . "." . $pic_ext;
// if (!file_exists($file_name)) {
// $pic_content = file_get_contents($pic_url);
// // echo $pic_content;
// file_put_contents($file_name, $pic_content);
// }
// }
} else if (array_key_exists("retweeted_status", $mblog)) {
if (array_key_exists("pics", $mblog["retweeted_status"])) {
$pics = $mblog["retweeted_status"]['pics'];
# code...
$user = Arr::get($mblog["retweeted_status"], "user", []);
$text = Arr::get($mblog["retweeted_status"], "text", '');
self::process_pic($pics, $image_dir, $user, $text);
}
# code...
} else if (array_key_exists("page_info", $mblog) && array_key_exists("media_info", $mblog["page_info"])) {
# code...
$page_info = $mblog["page_info"];
$media_info = $mblog["page_info"]["media_info"];
$video_url = Arr::get($media_info, "mp4_720p_mp4", "");
if ($video_url == "") {
$video_url = Arr::get($media_info, "mp4_hd_url", "");
}
if ($video_url == "") {
$video_url = Arr::get($media_info, "mp4_sd_url", "");
}
if ($video_url != "") {
self::process_video($video_url, $video_dir, $page_info["content2"]);
}
}
}
// die('save to ' . $fname . ' url = ' . $_REQUEST['url'] );
} else {
die('empty');
}
} catch (\Exception $e) {
Log::error($e);
Log::error($e->getTraceAsString());
// Log::info($content);
}
return true;
}
function microtime_float()
{
list($usec, $sec) = explode(" ", microtime());
return ((float)$usec + (float)$sec);
}
function process_pic($pics, $subdir, $user, $text)
{
if (!file_exists($subdir)) {
mkdir($subdir);
}
$h2w = 0;
foreach ($pics as $pic) {
if (array_key_exists("large", $pic)) {
$pic_url = $pic['large']['url'];
if (gettype($pic["large"]["geo"]) == "array") {
$h2w = $pic['large']['geo']['height'] / $pic['large']['geo']['width'];
}
} else {
$pic_url = $pic['url'];
if (gettype($pic["geo"]) == "array") {
$h2w = $pic['geo']['height'] / $pic['geo']['width'];
}
}
if ($h2w > 15) {
continue;
}
$pic_name = pathinfo($pic_url, PATHINFO_FILENAME);
$pic_ext = pathinfo($pic_url, PATHINFO_EXTENSION);
$user_name = Arr::get($user, "screen_name", '');
if ($user_name != '') {
$pic_name = $user_name . '--' .$pic_name;
}
$file_name = $subdir . "/" . $pic_name . "." . $pic_ext;
if (!file_exists($file_name)) {
$pic_content = file_get_contents($pic_url);
// echo $pic_content;
file_put_contents($file_name, $pic_content);
}
}
}
function process_video($video_url, $subdir, $video_name)
{
Log::info("video_url: " . $video_url);
if (!file_exists($subdir)) {
mkdir($subdir);
}
$url_params = parse_url($video_url);
parse_str($url_params["query"], $params);
$video_origin_name = $url_params["path"];
$video_origin_name = substr($video_origin_name, 1);
$video_origin_name = str_replace("stream/", "--", $video_origin_name);
$video_origin_name = str_replace("/", "", $video_origin_name);
$video_name = preg_replace("/(http|https|ftp)(.)*([a-z0-9\-\.\_])+/i", "", $video_name);
$now = strtotime(date("y-m-d h:i:s"));
if ($now > $params['Expires']) {
Log::error("视频有效期已过,now is " . $now .", Expires is ". $params['Expires']);
abort(404);
}
// $video_origin_name = pathinfo($video_url, PATHINFO_FILENAME);
// $video_ext = pathinfo($video_url, PATHINFO_EXTENSION);
$file_name = $subdir . "/" . $video_name . "--" . $video_origin_name;
if (!file_exists($file_name)) {
# code...
$video_content = file_get_contents($video_url);
file_put_contents($file_name, $video_content);
}
}
}