mirror of
https://github.com/starr-dusT/dotfiles.git
synced 2025-02-18 10:47:31 -08:00
switch back to upstream rss-bridge
This commit is contained in:
parent
fd760efad6
commit
c73e8a5e94
@ -1,128 +0,0 @@
|
||||
# Mostly a copy of https://github.com/NixOS/nixpkgs/blob/nixos-unstable/nixos/modules/services/web-apps/rss-bridge.nix
|
||||
# Since I'm using a custom derivation of rss-bridge I must define my own services module.
|
||||
|
||||
{ config, lib, pkgs, ... }:
|
||||
with lib;
|
||||
let
|
||||
cfg = config.my-services.rss-bridge;
|
||||
|
||||
poolName = "rss-bridge";
|
||||
|
||||
whitelist = pkgs.writeText "rss-bridge_whitelist.txt"
|
||||
(concatStringsSep "\n" cfg.whitelist);
|
||||
in
|
||||
{
|
||||
options = {
|
||||
my-services.rss-bridge = {
|
||||
enable = mkEnableOption (lib.mdDoc "rss-bridge");
|
||||
|
||||
user = mkOption {
|
||||
type = types.str;
|
||||
default = "nginx";
|
||||
description = lib.mdDoc ''
|
||||
User account under which both the service and the web-application run.
|
||||
'';
|
||||
};
|
||||
|
||||
group = mkOption {
|
||||
type = types.str;
|
||||
default = "nginx";
|
||||
description = lib.mdDoc ''
|
||||
Group under which the web-application run.
|
||||
'';
|
||||
};
|
||||
|
||||
pool = mkOption {
|
||||
type = types.str;
|
||||
default = poolName;
|
||||
description = lib.mdDoc ''
|
||||
Name of existing phpfpm pool that is used to run web-application.
|
||||
If not specified a pool will be created automatically with
|
||||
default values.
|
||||
'';
|
||||
};
|
||||
|
||||
dataDir = mkOption {
|
||||
type = types.str;
|
||||
default = "/var/lib/rss-bridge";
|
||||
description = lib.mdDoc ''
|
||||
Location in which cache directory will be created.
|
||||
You can put `config.ini.php` in here.
|
||||
'';
|
||||
};
|
||||
|
||||
virtualHost = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = "rss-bridge";
|
||||
description = lib.mdDoc ''
|
||||
Name of the nginx virtualhost to use and setup. If null, do not setup any virtualhost.
|
||||
'';
|
||||
};
|
||||
|
||||
whitelist = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
example = options.literalExpression ''
|
||||
[
|
||||
"Facebook"
|
||||
"Instagram"
|
||||
"Twitter"
|
||||
]
|
||||
'';
|
||||
description = lib.mdDoc ''
|
||||
List of bridges to be whitelisted.
|
||||
If the list is empty, rss-bridge will use whitelist.default.txt.
|
||||
Use `[ "*" ]` to whitelist all.
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
services.phpfpm.pools = mkIf (cfg.pool == poolName) {
|
||||
${poolName} = {
|
||||
user = cfg.user;
|
||||
settings = mapAttrs (name: mkDefault) {
|
||||
"listen.owner" = cfg.user;
|
||||
"listen.group" = cfg.user;
|
||||
"listen.mode" = "0600";
|
||||
"pm" = "dynamic";
|
||||
"pm.max_children" = 75;
|
||||
"pm.start_servers" = 10;
|
||||
"pm.min_spare_servers" = 5;
|
||||
"pm.max_spare_servers" = 20;
|
||||
"pm.max_requests" = 500;
|
||||
"catch_workers_output" = 1;
|
||||
};
|
||||
};
|
||||
};
|
||||
systemd.tmpfiles.rules = [
|
||||
"d '${cfg.dataDir}/cache' 0750 ${cfg.user} ${cfg.group} - -"
|
||||
(mkIf (cfg.whitelist != []) "L+ ${cfg.dataDir}/whitelist.txt - - - - ${whitelist}")
|
||||
"z '${cfg.dataDir}/config.ini.php' 0750 ${cfg.user} ${cfg.group} - -"
|
||||
];
|
||||
|
||||
services.nginx = mkIf (cfg.virtualHost != null) {
|
||||
enable = true;
|
||||
virtualHosts = {
|
||||
${cfg.virtualHost} = {
|
||||
root = "${pkgs.rss-bridge}";
|
||||
|
||||
locations."/" = {
|
||||
tryFiles = "$uri /index.php$is_args$args";
|
||||
};
|
||||
|
||||
locations."~ ^/index.php(/|$)" = {
|
||||
extraConfig = ''
|
||||
include ${config.services.nginx.package}/conf/fastcgi_params;
|
||||
fastcgi_split_path_info ^(.+\.php)(/.+)$;
|
||||
fastcgi_pass unix:${config.services.phpfpm.pools.${cfg.pool}.socket};
|
||||
fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;
|
||||
fastcgi_param RSSBRIDGE_DATA ${cfg.dataDir};
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
@ -3,10 +3,6 @@ let
|
||||
domain = "rssbridge.tstarr.us";
|
||||
in
|
||||
{
|
||||
imports = [
|
||||
./rss-bridge.nix
|
||||
];
|
||||
|
||||
services.postgresql = {
|
||||
enable = true;
|
||||
authentication = pkgs.lib.mkOverride 10 ''
|
||||
@ -32,7 +28,7 @@ in
|
||||
'';
|
||||
};
|
||||
|
||||
my-services.rss-bridge = {
|
||||
services.rss-bridge = {
|
||||
enable = true;
|
||||
whitelist = [ "*" ];
|
||||
virtualHost = "${domain}";
|
||||
|
@ -5,5 +5,4 @@ final: prev: {
|
||||
sway-scratchpad = final.callPackage ../pkgs/sway-scratchpad.nix {};
|
||||
advcpmv = final.callPackage ../pkgs/advcpmv.nix {};
|
||||
taskopen = final.callPackage ../pkgs/taskopen.nix {};
|
||||
rss-bridge = final.callPackage ../pkgs/rss-bridge {};
|
||||
}
|
||||
|
@ -1,31 +0,0 @@
|
||||
{ stdenv, lib, fetchFromGitHub }:
|
||||
|
||||
stdenv.mkDerivation rec {
|
||||
pname = "rss-bridge";
|
||||
version = "b037d1b4d1f0b0f422e21125ddef00a58e185ed1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "RSS-Bridge";
|
||||
repo = "rss-bridge";
|
||||
rev = version;
|
||||
sha256 = "sha256-zyWnjSYE2NFK/OJLnsFsE5oEyf+yrJe8TT6MH4roBwU=";
|
||||
};
|
||||
|
||||
patches = [
|
||||
./paths.patch
|
||||
./youtube_bridge.patch
|
||||
];
|
||||
|
||||
installPhase = ''
|
||||
mkdir $out/
|
||||
cp -R ./* $out
|
||||
'';
|
||||
|
||||
meta = with lib; {
|
||||
description = "The RSS feed for websites missing it";
|
||||
homepage = "https://github.com/RSS-Bridge/rss-bridge";
|
||||
license = licenses.unlicense;
|
||||
maintainers = with maintainers; [ starr-dusT ];
|
||||
platforms = platforms.all;
|
||||
};
|
||||
}
|
@ -1,78 +0,0 @@
|
||||
diff --git a/index.php b/index.php
|
||||
index 123f6ecd..69071aa2 100644
|
||||
--- a/index.php
|
||||
+++ b/index.php
|
||||
@@ -8,8 +8,8 @@ require_once __DIR__ . '/lib/bootstrap.php';
|
||||
|
||||
Configuration::verifyInstallation();
|
||||
$customConfig = [];
|
||||
-if (file_exists(__DIR__ . '/config.ini.php')) {
|
||||
- $customConfig = parse_ini_file(__DIR__ . '/config.ini.php', true, INI_SCANNER_TYPED);
|
||||
+if (file_exists(getenv('RSSBRIDGE_DATA') . '/config.ini.php')) {
|
||||
+ $customConfig = parse_ini_file(getenv('RSSBRIDGE_DATA') . '/config.ini.php', true, INI_SCANNER_TYPED);
|
||||
}
|
||||
Configuration::loadConfiguration($customConfig, getenv());
|
||||
|
||||
diff --git a/lib/BridgeFactory.php b/lib/BridgeFactory.php
|
||||
index ad433287..195c7af1 100644
|
||||
--- a/lib/BridgeFactory.php
|
||||
+++ b/lib/BridgeFactory.php
|
||||
@@ -13,12 +13,18 @@ final class BridgeFactory
|
||||
$this->cache = RssBridge::getCache();
|
||||
$this->logger = RssBridge::getLogger();
|
||||
|
||||
- // Create all possible bridge class names from fs
|
||||
+ // Create all possible bridge class names from original fs
|
||||
foreach (scandir(__DIR__ . '/../bridges/') as $file) {
|
||||
if (preg_match('/^([^.]+Bridge)\.php$/U', $file, $m)) {
|
||||
$this->bridgeClassNames[] = $m[1];
|
||||
}
|
||||
}
|
||||
+ // Create all possible bridge class names from additional fs
|
||||
+ foreach (scandir(PATH_BRIDGES) as $file) {
|
||||
+ if (preg_match('/^([^.]+Bridge)\.php$/U', $file, $m)) {
|
||||
+ $this->bridgeClassNames[] = $m[1];
|
||||
+ }
|
||||
+ }
|
||||
|
||||
$enabledBridges = Configuration::getConfig('system', 'enabled_bridges');
|
||||
if ($enabledBridges === null) {
|
||||
diff --git a/lib/Configuration.php b/lib/Configuration.php
|
||||
index c6fed0fd..672a5699 100644
|
||||
--- a/lib/Configuration.php
|
||||
+++ b/lib/Configuration.php
|
||||
@@ -92,8 +92,8 @@ final class Configuration
|
||||
}
|
||||
}
|
||||
|
||||
- if (file_exists(__DIR__ . '/../whitelist.txt')) {
|
||||
- $enabledBridges = trim(file_get_contents(__DIR__ . '/../whitelist.txt'));
|
||||
+ if (file_exists(getenv('RSSBRIDGE_DATA') . '/whitelist.txt')) {
|
||||
+ $enabledBridges = trim(file_get_contents(getenv('RSSBRIDGE_DATA') . '/whitelist.txt'));
|
||||
if ($enabledBridges === '*') {
|
||||
self::setConfig('system', 'enabled_bridges', ['*']);
|
||||
} else {
|
||||
diff --git a/lib/bootstrap.php b/lib/bootstrap.php
|
||||
index a95de9dd..e8ed317f 100644
|
||||
--- a/lib/bootstrap.php
|
||||
+++ b/lib/bootstrap.php
|
||||
@@ -7,7 +7,10 @@ const PATH_LIB_FORMATS = __DIR__ . '/../formats/';
|
||||
const PATH_LIB_CACHES = __DIR__ . '/../caches/';
|
||||
|
||||
/** Path to the cache folder */
|
||||
-const PATH_CACHE = __DIR__ . '/../cache/';
|
||||
+define('PATH_CACHE', getenv('RSSBRIDGE_DATA') . '/cache/');
|
||||
+
|
||||
+/** Path to extra bridge files */
|
||||
+define('PATH_BRIDGES', getenv('RSSBRIDGE_DATA') . '/bridges/');
|
||||
|
||||
/** URL to the RSS-Bridge repository */
|
||||
const REPOSITORY = 'https://github.com/RSS-Bridge/rss-bridge/';
|
||||
@@ -41,6 +44,7 @@ spl_autoload_register(function ($className) {
|
||||
__DIR__ . '/../caches/',
|
||||
__DIR__ . '/../formats/',
|
||||
__DIR__ . '/../lib/',
|
||||
+ PATH_BRIDGES,
|
||||
];
|
||||
foreach ($folders as $folder) {
|
||||
$file = $folder . $className . '.php';
|
@ -1,567 +0,0 @@
|
||||
diff --git a/bridges/CustomYoutubeBridge.php b/bridges/CustomYoutubeBridge.php
|
||||
new file mode 100644
|
||||
index 00000000..d04c6ac0
|
||||
--- /dev/null
|
||||
+++ b/bridges/CustomYoutubeBridge.php
|
||||
@@ -0,0 +1,561 @@
|
||||
+<?php
|
||||
+
|
||||
+/**
|
||||
+* Custom RssBridgeYoutube adapted from https://github.com/RSS-Bridge/rss-bridge/blob/master/bridges/YoutubeBridge.php
|
||||
+* Returns the newest videos
|
||||
+* WARNING: to parse big playlists (over ~90 videos), you need to edit simple_html_dom.php:
|
||||
+* change: define('MAX_FILE_SIZE', 600000);
|
||||
+* into: define('MAX_FILE_SIZE', 900000); (or more)
|
||||
+*/
|
||||
+class CustomYoutubeBridge extends BridgeAbstract
|
||||
+{
|
||||
+ const NAME = 'Custom YouTube Bridge';
|
||||
+ const URI = 'https://www.youtube.com';
|
||||
+ const CACHE_TIMEOUT = 60 * 60 * 3;
|
||||
+ const DESCRIPTION = 'Returns the 10 newest videos by username/channel/playlist or search';
|
||||
+
|
||||
+ const PARAMETERS = [
|
||||
+ 'By username' => [
|
||||
+ 'u' => [
|
||||
+ 'name' => 'username',
|
||||
+ 'exampleValue' => 'LinusTechTips',
|
||||
+ 'required' => true
|
||||
+ ]
|
||||
+ ],
|
||||
+ 'By channel id' => [
|
||||
+ 'c' => [
|
||||
+ 'name' => 'channel id',
|
||||
+ 'exampleValue' => 'UCw38-8_Ibv_L6hlKChHO9dQ',
|
||||
+ 'required' => true
|
||||
+ ]
|
||||
+ ],
|
||||
+ 'By custom name' => [
|
||||
+ 'custom' => [
|
||||
+ 'name' => 'custom name',
|
||||
+ 'exampleValue' => 'LinusTechTips',
|
||||
+ 'required' => true
|
||||
+ ]
|
||||
+ ],
|
||||
+ 'By playlist Id' => [
|
||||
+ 'p' => [
|
||||
+ 'name' => 'playlist id',
|
||||
+ 'exampleValue' => 'PL8mG-RkN2uTzJc8N0EoyhdC54prvBBLpj',
|
||||
+ 'required' => true
|
||||
+ ]
|
||||
+ ],
|
||||
+ 'Search result' => [
|
||||
+ 's' => [
|
||||
+ 'name' => 'search keyword',
|
||||
+ 'exampleValue' => 'LinusTechTips',
|
||||
+ 'required' => true
|
||||
+ ],
|
||||
+ 'pa' => [
|
||||
+ 'name' => 'page',
|
||||
+ 'type' => 'number',
|
||||
+ 'title' => 'This option is not work anymore, as YouTube will always return the same page',
|
||||
+ 'exampleValue' => 1
|
||||
+ ]
|
||||
+ ],
|
||||
+ 'global' => [
|
||||
+ 'duration_min' => [
|
||||
+ 'name' => 'min. duration (minutes)',
|
||||
+ 'type' => 'number',
|
||||
+ 'title' => 'Minimum duration for the video in minutes',
|
||||
+ 'exampleValue' => 5
|
||||
+ ],
|
||||
+ 'duration_max' => [
|
||||
+ 'name' => 'max. duration (minutes)',
|
||||
+ 'type' => 'number',
|
||||
+ 'title' => 'Maximum duration for the video in minutes',
|
||||
+ 'exampleValue' => 10
|
||||
+ ]
|
||||
+ ]
|
||||
+ ];
|
||||
+
|
||||
+ private $feedName = '';
|
||||
+ private $feeduri = '';
|
||||
+ private $feedIconUrl = '';
|
||||
+ // This took from repo BetterVideoRss of VerifiedJoseph.
|
||||
+ const URI_REGEX = '/(https?:\/\/(?:www\.)?(?:[a-zA-Z0-9-.]{2,256}\.[a-z]{2,20})(\:[0-9]{2 ,4})?(?:\/[a-zA-Z0-9@:%_\+.,~#"\'!?&\/\/=\-*]+|\/)?)/ims'; //phpcs:ignore
|
||||
+
|
||||
+ public function collectData()
|
||||
+ {
|
||||
+ $cacheKey = 'youtube_rate_limit';
|
||||
+ if ($this->cache->get($cacheKey)) {
|
||||
+ throw new HttpException('429 Too Many Requests', 429);
|
||||
+ }
|
||||
+ try {
|
||||
+ $this->collectDataInternal();
|
||||
+ } catch (HttpException $e) {
|
||||
+ if ($e->getCode() === 429) {
|
||||
+ $this->cache->set($cacheKey, true, 60 * 16);
|
||||
+ }
|
||||
+ throw $e;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ private function collectDataInternal()
|
||||
+ {
|
||||
+ $html = '';
|
||||
+ $url_feed = '';
|
||||
+ $url_listing = '';
|
||||
+
|
||||
+ $username = $this->getInput('u');
|
||||
+ $channel = $this->getInput('c');
|
||||
+ $custom = $this->getInput('custom');
|
||||
+ $playlist = $this->getInput('p');
|
||||
+ $search = $this->getInput('s');
|
||||
+
|
||||
+ $durationMin = $this->getInput('duration_min');
|
||||
+ $durationMax = $this->getInput('duration_max');
|
||||
+
|
||||
+ // Whether to discriminate videos by duration
|
||||
+ $filterByDuration = $durationMin || $durationMax;
|
||||
+
|
||||
+ if ($username) {
|
||||
+ // user and channel
|
||||
+ $url_feed = self::URI . '/feeds/videos.xml?user=' . urlencode($username);
|
||||
+ $url_listing = self::URI . '/user/' . urlencode($username) . '/videos';
|
||||
+ } elseif ($channel) {
|
||||
+ $url_feed = self::URI . '/feeds/videos.xml?channel_id=' . urlencode($channel);
|
||||
+ $url_listing = self::URI . '/channel/' . urlencode($channel) . '/videos';
|
||||
+ } elseif ($custom) {
|
||||
+ $url_listing = self::URI . '/' . urlencode($custom) . '/videos';
|
||||
+ }
|
||||
+
|
||||
+ if ($url_feed || $url_listing) {
|
||||
+ // user, channel or custom
|
||||
+ $this->feeduri = $url_listing;
|
||||
+ if ($custom) {
|
||||
+ // Extract the feed url for the custom name
|
||||
+ $html = $this->fetch($url_listing);
|
||||
+ $jsonData = $this->extractJsonFromHtml($html);
|
||||
+ // Pluck out the rss feed url
|
||||
+ $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl;
|
||||
+ $this->feedIconUrl = $jsonData->metadata->channelMetadataRenderer->avatar->thumbnails[0]->url;
|
||||
+ }
|
||||
+ if (!$custom) {
|
||||
+ // Fetch the html page
|
||||
+ $html = $this->fetch($url_listing);
|
||||
+ $jsonData = $this->extractJsonFromHtml($html);
|
||||
+ }
|
||||
+ $channel_id = '';
|
||||
+ if (isset($jsonData->contents)) {
|
||||
+ $channel_id = $jsonData->metadata->channelMetadataRenderer->externalId;
|
||||
+ $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1];
|
||||
+ $jsonData = $jsonData->tabRenderer->content->richGridRenderer->contents;
|
||||
+ // $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items;
|
||||
+ $this->fetchItemsFromFromJsonData($jsonData);
|
||||
+ } else {
|
||||
+ returnServerError('Unable to get data from YouTube');
|
||||
+ }
|
||||
+ $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
|
||||
+ } elseif ($playlist) {
|
||||
+ // playlist
|
||||
+ $url_feed = self::URI . '/feeds/videos.xml?playlist_id=' . urlencode($playlist);
|
||||
+ $url_listing = self::URI . '/playlist?list=' . urlencode($playlist);
|
||||
+ $html = $this->fetch($url_listing);
|
||||
+ $jsonData = $this->extractJsonFromHtml($html);
|
||||
+ // TODO: this method returns only first 100 video items
|
||||
+ // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element
|
||||
+ $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0];
|
||||
+ $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer;
|
||||
+ $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents;
|
||||
+ $item_count = count($jsonData);
|
||||
+
|
||||
+ if ($item_count > 15 || $filterByDuration) {
|
||||
+ $this->fetchItemsFromFromJsonData($jsonData);
|
||||
+ } else {
|
||||
+ $xml = $this->fetch($url_feed);
|
||||
+ $this->extractItemsFromXmlFeed($xml);
|
||||
+ }
|
||||
+ $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
|
||||
+ usort($this->items, function ($item1, $item2) {
|
||||
+ if (!is_int($item1['timestamp']) && !is_int($item2['timestamp'])) {
|
||||
+ $item1['timestamp'] = strtotime($item1['timestamp']);
|
||||
+ $item2['timestamp'] = strtotime($item2['timestamp']);
|
||||
+ }
|
||||
+ return $item2['timestamp'] - $item1['timestamp'];
|
||||
+ });
|
||||
+ } elseif ($search) {
|
||||
+ // search
|
||||
+ $url_listing = self::URI . '/results?search_query=' . urlencode($search) . '&sp=CAI%253D';
|
||||
+ $html = $this->fetch($url_listing);
|
||||
+ $jsonData = $this->extractJsonFromHtml($html);
|
||||
+ $jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents;
|
||||
+ $jsonData = $jsonData->sectionListRenderer->contents;
|
||||
+ foreach ($jsonData as $data) {
|
||||
+ // Search result includes some ads, have to filter them
|
||||
+ if (isset($data->itemSectionRenderer->contents[0]->videoRenderer)) {
|
||||
+ $jsonData = $data->itemSectionRenderer->contents;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ $this->fetchItemsFromFromJsonData($jsonData);
|
||||
+ $this->feeduri = $url_listing;
|
||||
+ $this->feedName = 'Search: ' . $search;
|
||||
+ } else {
|
||||
+ returnClientError("You must either specify either:\n - YouTube username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)");
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ private function fetchVideoDetails($videoId, &$author, &$description, &$timestamp)
|
||||
+ {
|
||||
+ $url = self::URI . "/watch?v=$videoId";
|
||||
+ $html = $this->fetch($url, true);
|
||||
+
|
||||
+ // Skip unavailable videos
|
||||
+ if (strpos($html->innertext, 'IS_UNAVAILABLE_PAGE') !== false) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ $elAuthor = $html->find('span[itemprop=author] > link[itemprop=name]', 0);
|
||||
+ if (!is_null($elAuthor)) {
|
||||
+ $author = $elAuthor->getAttribute('content');
|
||||
+ }
|
||||
+
|
||||
+ $elDatePublished = $html->find('meta[itemprop=datePublished]', 0);
|
||||
+ if (!is_null($elDatePublished)) {
|
||||
+ $timestamp = strtotime($elDatePublished->getAttribute('content'));
|
||||
+ }
|
||||
+
|
||||
+ $jsonData = $this->extractJsonFromHtml($html);
|
||||
+ if (!isset($jsonData->contents)) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ $jsonData = $jsonData->contents->twoColumnWatchNextResults->results->results->contents ?? null;
|
||||
+ if (!$jsonData) {
|
||||
+ throw new \Exception('Unable to find json data');
|
||||
+ }
|
||||
+ $videoSecondaryInfo = null;
|
||||
+ foreach ($jsonData as $item) {
|
||||
+ if (isset($item->videoSecondaryInfoRenderer)) {
|
||||
+ $videoSecondaryInfo = $item->videoSecondaryInfoRenderer;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ if (!$videoSecondaryInfo) {
|
||||
+ returnServerError('Could not find videoSecondaryInfoRenderer. Error at: ' . $videoId);
|
||||
+ }
|
||||
+
|
||||
+ $description = $videoSecondaryInfo->attributedDescription->content ?? '';
|
||||
+
|
||||
+ // Default whitespace chars used by trim + non-breaking spaces (https://en.wikipedia.org/wiki/Non-breaking_space)
|
||||
+ $whitespaceChars = " \t\n\r\0\x0B\u{A0}\u{2060}\u{202F}\u{2007}";
|
||||
+ $descEnhancements = $this->ytBridgeGetVideoDescriptionEnhancements($videoSecondaryInfo, $description, self::URI, $whitespaceChars);
|
||||
+ foreach ($descEnhancements as $descEnhancement) {
|
||||
+ if (isset($descEnhancement['url'])) {
|
||||
+ $descBefore = mb_substr($description, 0, $descEnhancement['pos']);
|
||||
+ $descValue = mb_substr($description, $descEnhancement['pos'], $descEnhancement['len']);
|
||||
+ $descAfter = mb_substr($description, $descEnhancement['pos'] + $descEnhancement['len'], null);
|
||||
+
|
||||
+ // Extended trim for the display value of internal links, e.g.:
|
||||
+ // FAVICON • Video Name
|
||||
+ // FAVICON / @ChannelName
|
||||
+ $descValue = trim($descValue, $whitespaceChars . '•/');
|
||||
+
|
||||
+ $description = sprintf('%s<a href="%s" target="_blank">%s</a>%s', $descBefore, $descEnhancement['url'], $descValue, $descAfter);
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ private function ytBridgeGetVideoDescriptionEnhancements(
|
||||
+ object $videoSecondaryInfo,
|
||||
+ string $descriptionContent,
|
||||
+ string $baseUrl,
|
||||
+ string $whitespaceChars
|
||||
+ ): array {
|
||||
+ $commandRuns = $videoSecondaryInfo->attributedDescription->commandRuns ?? [];
|
||||
+ if (count($commandRuns) <= 0) {
|
||||
+ return [];
|
||||
+ }
|
||||
+
|
||||
+ $enhancements = [];
|
||||
+
|
||||
+ $boundaryWhitespaceChars = mb_str_split($whitespaceChars);
|
||||
+ $boundaryStartChars = array_merge($boundaryWhitespaceChars, [':', '-', '(']);
|
||||
+ $boundaryEndChars = array_merge($boundaryWhitespaceChars, [',', '.', "'", ')']);
|
||||
+ $hashtagBoundaryEndChars = array_merge($boundaryEndChars, ['#', '-']);
|
||||
+
|
||||
+ $descriptionContentLength = mb_strlen($descriptionContent);
|
||||
+
|
||||
+ $minPositionOffset = 0;
|
||||
+
|
||||
+ $prevStartPosition = 0;
|
||||
+ $totalLength = 0;
|
||||
+ $maxPositionByStartIndex = [];
|
||||
+ foreach (array_reverse($commandRuns) as $commandRun) {
|
||||
+ $endPosition = $commandRun->startIndex + $commandRun->length;
|
||||
+ if ($endPosition < $prevStartPosition) {
|
||||
+ $totalLength += 1;
|
||||
+ }
|
||||
+ $totalLength += $commandRun->length;
|
||||
+ $maxPositionByStartIndex[$commandRun->startIndex] = $totalLength;
|
||||
+ $prevStartPosition = $commandRun->startIndex;
|
||||
+ }
|
||||
+
|
||||
+ foreach ($commandRuns as $commandRun) {
|
||||
+ $commandMetadata = $commandRun->onTap->innertubeCommand->commandMetadata->webCommandMetadata ?? null;
|
||||
+ if (!isset($commandMetadata)) {
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ $enhancement = null;
|
||||
+
|
||||
+ /*
|
||||
+ $commandRun->startIndex can be offset by few positions in the positive direction
|
||||
+ when some multibyte characters (e.g. emojis, but maybe also others) are used in the plain text video description.
|
||||
+ (probably some difference between php and javascript in handling multibyte characters)
|
||||
+ This loop should correct the position in most cases. It searches for the next word (determined by a set of boundary chars) with the expected length.
|
||||
+ Several safeguards ensure that the correct word is chosen. When a link can not be matched,
|
||||
+ everything will be discarded to prevent corrupting the description.
|
||||
+ Hashtags require a different set of boundary chars.
|
||||
+ */
|
||||
+ $isHashtag = $commandMetadata->webPageType === 'WEB_PAGE_TYPE_BROWSE';
|
||||
+ $prevEnhancement = end($enhancements);
|
||||
+ $minPosition = $prevEnhancement === false ? 0 : $prevEnhancement['pos'] + $prevEnhancement['len'];
|
||||
+ $maxPosition = $descriptionContentLength - $maxPositionByStartIndex[$commandRun->startIndex];
|
||||
+ $position = min($commandRun->startIndex - $minPositionOffset, $maxPosition);
|
||||
+ while ($position >= $minPosition) {
|
||||
+ // The link display value can only ever include a new line at the end (which will be removed further below), never in between.
|
||||
+ $newLinePosition = mb_strpos($descriptionContent, "\n", $position);
|
||||
+ if ($newLinePosition !== false && $newLinePosition < $position + ($commandRun->length - 1)) {
|
||||
+ $position = $newLinePosition - ($commandRun->length - 1);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ $firstChar = mb_substr($descriptionContent, $position, 1);
|
||||
+ $boundaryStart = mb_substr($descriptionContent, $position - 1, 1);
|
||||
+ $boundaryEndIndex = $position + $commandRun->length;
|
||||
+ $boundaryEnd = mb_substr($descriptionContent, $boundaryEndIndex, 1);
|
||||
+
|
||||
+ $boundaryStartIsValid = $position === 0 ||
|
||||
+ in_array($boundaryStart, $boundaryStartChars) ||
|
||||
+ ($isHashtag && $firstChar === '#');
|
||||
+ $boundaryEndIsValid = $boundaryEndIndex === $descriptionContentLength ||
|
||||
+ in_array($boundaryEnd, $isHashtag ? $hashtagBoundaryEndChars : $boundaryEndChars);
|
||||
+
|
||||
+ if ($boundaryStartIsValid && $boundaryEndIsValid) {
|
||||
+ $minPositionOffset = $commandRun->startIndex - $position;
|
||||
+ $enhancement = [
|
||||
+ 'pos' => $position,
|
||||
+ 'len' => $commandRun->length,
|
||||
+ ];
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ $position--;
|
||||
+ }
|
||||
+
|
||||
+ if (!isset($enhancement)) {
|
||||
+ $this->logger->debug(sprintf('Position %d cannot be corrected in "%s"', $commandRun->startIndex, substr($descriptionContent, 0, 50) . '...'));
|
||||
+ // Skip to prevent the description from becoming corrupted
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ // $commandRun->length sometimes incorrectly includes the newline as last char
|
||||
+ $lastChar = mb_substr($descriptionContent, $enhancement['pos'] + $enhancement['len'] - 1, 1);
|
||||
+ if ($lastChar === "\n") {
|
||||
+ $enhancement['len'] -= 1;
|
||||
+ }
|
||||
+
|
||||
+ $commandUrl = parse_url($commandMetadata->url);
|
||||
+ if ($commandUrl['path'] === '/redirect') {
|
||||
+ parse_str($commandUrl['query'], $commandUrlQuery);
|
||||
+ $enhancement['url'] = urldecode($commandUrlQuery['q']);
|
||||
+ } elseif (isset($commandUrl['host'])) {
|
||||
+ $enhancement['url'] = $commandMetadata->url;
|
||||
+ } else {
|
||||
+ $enhancement['url'] = $baseUrl . $commandMetadata->url;
|
||||
+ }
|
||||
+
|
||||
+ $enhancements[] = $enhancement;
|
||||
+ }
|
||||
+
|
||||
+ if (count($enhancements) !== count($commandRuns)) {
|
||||
+ // At least one link can not be matched. Discard everything to prevent corrupting the description.
|
||||
+ return [];
|
||||
+ }
|
||||
+
|
||||
+ // Sort by position in descending order to be able to safely replace values
|
||||
+ return array_reverse($enhancements);
|
||||
+ }
|
||||
+
|
||||
+ private function extractItemsFromXmlFeed($xml)
|
||||
+ {
|
||||
+ $this->feedName = $this->decodeTitle($xml->find('feed > title', 0)->plaintext);
|
||||
+
|
||||
+ foreach ($xml->find('entry') as $element) {
|
||||
+ $videoId = str_replace('yt:video:', '', $element->find('id', 0)->plaintext);
|
||||
+ if (strpos($videoId, 'googleads') !== false) {
|
||||
+ continue;
|
||||
+ }
|
||||
+ $title = $this->decodeTitle($element->find('title', 0)->plaintext);
|
||||
+ $author = $element->find('name', 0)->plaintext;
|
||||
+ $desc = $element->find('media:description', 0)->innertext;
|
||||
+ $desc = htmlspecialchars($desc);
|
||||
+ $desc = nl2br($desc);
|
||||
+ $desc = preg_replace(self::URI_REGEX, '<a href="$1" target="_blank">$1</a> ', $desc);
|
||||
+ $time = strtotime($element->find('published', 0)->plaintext);
|
||||
+ $this->addItem($videoId, $title, $author, $desc, $time);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ private function fetch($url, bool $cache = false)
|
||||
+ {
|
||||
+ $header = ['Accept-Language: en-US'];
|
||||
+ $ttl = 86400 * 3; // 3d
|
||||
+ $stripNewlines = false;
|
||||
+ if ($cache) {
|
||||
+ return getSimpleHTMLDOMCached($url, $ttl, $header, [], true, true, DEFAULT_TARGET_CHARSET, $stripNewlines);
|
||||
+ }
|
||||
+ return getSimpleHTMLDOM($url, $header, [], true, true, DEFAULT_TARGET_CHARSET, $stripNewlines);
|
||||
+ }
|
||||
+
|
||||
+ private function extractJsonFromHtml($html)
|
||||
+ {
|
||||
+ $scriptRegex = '/var ytInitialData = (.*?);<\/script>/';
|
||||
+ $result = preg_match($scriptRegex, $html, $matches);
|
||||
+ if (! $result) {
|
||||
+ $this->logger->debug('Could not find ytInitialData');
|
||||
+ return null;
|
||||
+ }
|
||||
+ $data = json_decode($matches[1]);
|
||||
+ return $data;
|
||||
+ }
|
||||
+
|
||||
+ private function fetchItemsFromFromJsonData($jsonData)
|
||||
+ {
|
||||
+ $minimumDurationSeconds = ($this->getInput('duration_min') ?: -1) * 60;
|
||||
+ $maximumDurationSeconds = ($this->getInput('duration_max') ?: INF) * 60;
|
||||
+
|
||||
+ foreach ($jsonData as $item) {
|
||||
+ $wrapper = null;
|
||||
+ if (isset($item->gridVideoRenderer)) {
|
||||
+ $wrapper = $item->gridVideoRenderer;
|
||||
+ } elseif (isset($item->videoRenderer)) {
|
||||
+ $wrapper = $item->videoRenderer;
|
||||
+ } elseif (isset($item->playlistVideoRenderer)) {
|
||||
+ $wrapper = $item->playlistVideoRenderer;
|
||||
+ } elseif (isset($item->richItemRenderer)) {
|
||||
+ $wrapper = $item->richItemRenderer->content->videoRenderer;
|
||||
+ } else {
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ // 01:03:30 | 15:06 | 1:24
|
||||
+ $lengthText = $wrapper->lengthText->simpleText ?? null;
|
||||
+ // 6,875 views
|
||||
+ $viewCount = $wrapper->viewCountText->simpleText ?? null;
|
||||
+ // Dc645M8Het8
|
||||
+ $videoId = $wrapper->videoId;
|
||||
+ // Jumbo frames - transfer more data faster!
|
||||
+ $title = $wrapper->title->runs[0]->text ?? $wrapper->title->accessibility->accessibilityData->label ?? null;
|
||||
+ $author = null;
|
||||
+ $description = $wrapper->descriptionSnippet->runs[0]->text ?? null;
|
||||
+ // 5 days ago | 1 month ago
|
||||
+ $publishedTimeText = $wrapper->publishedTimeText->simpleText ?? $wrapper->videoInfo->runs[2]->text ?? null;
|
||||
+ $timestamp = null;
|
||||
+ if ($publishedTimeText) {
|
||||
+ try {
|
||||
+ $publicationDate = new \DateTimeImmutable($publishedTimeText);
|
||||
+ // Hard-code hour, minute and second
|
||||
+ $publicationDate = $publicationDate->setTime(0, 0, 0);
|
||||
+ $timestamp = $publicationDate->getTimestamp();
|
||||
+ } catch (\Exception $e) {
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ $durationText = 0;
|
||||
+ if ($lengthText) {
|
||||
+ $durationText = $lengthText;
|
||||
+ } else {
|
||||
+ foreach ($wrapper->thumbnailOverlays as $overlay) {
|
||||
+ if (isset($overlay->thumbnailOverlayTimeStatusRenderer)) {
|
||||
+ $durationText = $overlay->thumbnailOverlayTimeStatusRenderer->text;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ if (is_string($durationText)) {
|
||||
+ if (preg_match('/([\d]{1,2})\:([\d]{1,2})\:([\d]{2})/', $durationText)) {
|
||||
+ $durationText = preg_replace('/([\d]{1,2})\:([\d]{1,2})\:([\d]{2})/', '$1:$2:$3', $durationText);
|
||||
+ } else {
|
||||
+ $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText);
|
||||
+ }
|
||||
+ sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds);
|
||||
+ $duration = $hours * 3600 + $minutes * 60 + $seconds;
|
||||
+ if ($duration < $minimumDurationSeconds || $duration > $maximumDurationSeconds) {
|
||||
+ continue;
|
||||
+ }
|
||||
+ }
|
||||
+ # Re-fetch better details from xml
|
||||
+ $this->fetchVideoDetails($videoId, $author, $description, $timestamp);
|
||||
+ $this->addItem($videoId, $title, $author, $description, $timestamp, $durationText);
|
||||
+ if (count($this->items) >= 99) {
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ private function addItem($videoId, $title, $author, $description, $timestamp, $durationText, $thumbnail = '')
|
||||
+ {
|
||||
+ $description = nl2br($description);
|
||||
+
|
||||
+ $item = [];
|
||||
+ // This should probably be uid?
|
||||
+ $item['id'] = $videoId;
|
||||
+ $item['title'] = $title . " [" . $durationText . "]";
|
||||
+ $item['author'] = $author ?? '';
|
||||
+ $item['timestamp'] = $timestamp;
|
||||
+ $item['uri'] = self::URI . '/watch?v=' . $videoId;
|
||||
+ if (!$thumbnail) {
|
||||
+ // Fallback to default thumbnail if there aren't any provided.
|
||||
+ $thumbnail = '0';
|
||||
+ }
|
||||
+ $thumbnailUri = str_replace('/www.', '/img.', self::URI) . '/vi/' . $videoId . '/' . $thumbnail . '.jpg';
|
||||
+ $item['content'] = sprintf('<a href="%s"><img src="%s" /></a><br />%s', $item['uri'], $thumbnailUri, $description);
|
||||
+ $this->items[] = $item;
|
||||
+ }
|
||||
+
|
||||
+ private function decodeTitle($title)
|
||||
+ {
|
||||
+ // convert both Ӓ and " to UTF-8
|
||||
+ return html_entity_decode($title, ENT_QUOTES, 'UTF-8');
|
||||
+ }
|
||||
+
|
||||
+ public function getURI()
|
||||
+ {
|
||||
+ if (!is_null($this->getInput('p'))) {
|
||||
+ return static::URI . '/playlist?list=' . $this->getInput('p');
|
||||
+ } elseif ($this->feeduri) {
|
||||
+ return $this->feeduri;
|
||||
+ }
|
||||
+
|
||||
+ return parent::getURI();
|
||||
+ }
|
||||
+
|
||||
+ public function getName()
|
||||
+ {
|
||||
+ switch ($this->queriedContext) {
|
||||
+ case 'By username':
|
||||
+ case 'By channel id':
|
||||
+ case 'By custom name':
|
||||
+ case 'By playlist Id':
|
||||
+ case 'Search result':
|
||||
+ return htmlspecialchars_decode($this->feedName) . ' - YouTube';
|
||||
+ default:
|
||||
+ return parent::getName();
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ public function getIcon()
|
||||
+ {
|
||||
+ if (empty($this->feedIconUrl)) {
|
||||
+ return parent::getIcon();
|
||||
+ } else {
|
||||
+ return $this->feedIconUrl;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
Loading…
x
Reference in New Issue
Block a user