switch back to upstream rss-bridge

This commit is contained in:
Tyler Starr 2023-12-02 17:10:13 -08:00
parent fd760efad6
commit c73e8a5e94
6 changed files with 1 additions and 810 deletions

View File

@ -1,128 +0,0 @@
# Mostly a copy of https://github.com/NixOS/nixpkgs/blob/nixos-unstable/nixos/modules/services/web-apps/rss-bridge.nix
# Since I'm using a custom derivation of rss-bridge I must define my own services module.
{ config, lib, pkgs, ... }:
with lib;
let
cfg = config.my-services.rss-bridge;
poolName = "rss-bridge";
whitelist = pkgs.writeText "rss-bridge_whitelist.txt"
(concatStringsSep "\n" cfg.whitelist);
in
{
options = {
my-services.rss-bridge = {
enable = mkEnableOption (lib.mdDoc "rss-bridge");
user = mkOption {
type = types.str;
default = "nginx";
description = lib.mdDoc ''
User account under which both the service and the web-application run.
'';
};
group = mkOption {
type = types.str;
default = "nginx";
description = lib.mdDoc ''
Group under which the web-application run.
'';
};
pool = mkOption {
type = types.str;
default = poolName;
description = lib.mdDoc ''
Name of existing phpfpm pool that is used to run web-application.
If not specified a pool will be created automatically with
default values.
'';
};
dataDir = mkOption {
type = types.str;
default = "/var/lib/rss-bridge";
description = lib.mdDoc ''
Location in which cache directory will be created.
You can put `config.ini.php` in here.
'';
};
virtualHost = mkOption {
type = types.nullOr types.str;
default = "rss-bridge";
description = lib.mdDoc ''
Name of the nginx virtualhost to use and setup. If null, do not setup any virtualhost.
'';
};
whitelist = mkOption {
type = types.listOf types.str;
default = [];
example = options.literalExpression ''
[
"Facebook"
"Instagram"
"Twitter"
]
'';
description = lib.mdDoc ''
List of bridges to be whitelisted.
If the list is empty, rss-bridge will use whitelist.default.txt.
Use `[ "*" ]` to whitelist all.
'';
};
};
};
config = mkIf cfg.enable {
services.phpfpm.pools = mkIf (cfg.pool == poolName) {
${poolName} = {
user = cfg.user;
settings = mapAttrs (name: mkDefault) {
"listen.owner" = cfg.user;
"listen.group" = cfg.user;
"listen.mode" = "0600";
"pm" = "dynamic";
"pm.max_children" = 75;
"pm.start_servers" = 10;
"pm.min_spare_servers" = 5;
"pm.max_spare_servers" = 20;
"pm.max_requests" = 500;
"catch_workers_output" = 1;
};
};
};
systemd.tmpfiles.rules = [
"d '${cfg.dataDir}/cache' 0750 ${cfg.user} ${cfg.group} - -"
(mkIf (cfg.whitelist != []) "L+ ${cfg.dataDir}/whitelist.txt - - - - ${whitelist}")
"z '${cfg.dataDir}/config.ini.php' 0750 ${cfg.user} ${cfg.group} - -"
];
services.nginx = mkIf (cfg.virtualHost != null) {
enable = true;
virtualHosts = {
${cfg.virtualHost} = {
root = "${pkgs.rss-bridge}";
locations."/" = {
tryFiles = "$uri /index.php$is_args$args";
};
locations."~ ^/index.php(/|$)" = {
extraConfig = ''
include ${config.services.nginx.package}/conf/fastcgi_params;
fastcgi_split_path_info ^(.+\.php)(/.+)$;
fastcgi_pass unix:${config.services.phpfpm.pools.${cfg.pool}.socket};
fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;
fastcgi_param RSSBRIDGE_DATA ${cfg.dataDir};
'';
};
};
};
};
};
}

View File

@ -3,10 +3,6 @@ let
domain = "rssbridge.tstarr.us";
in
{
imports = [
./rss-bridge.nix
];
services.postgresql = {
enable = true;
authentication = pkgs.lib.mkOverride 10 ''
@ -32,7 +28,7 @@ in
'';
};
my-services.rss-bridge = {
services.rss-bridge = {
enable = true;
whitelist = [ "*" ];
virtualHost = "${domain}";

View File

@ -5,5 +5,4 @@ final: prev: {
sway-scratchpad = final.callPackage ../pkgs/sway-scratchpad.nix {};
advcpmv = final.callPackage ../pkgs/advcpmv.nix {};
taskopen = final.callPackage ../pkgs/taskopen.nix {};
rss-bridge = final.callPackage ../pkgs/rss-bridge {};
}

View File

@ -1,31 +0,0 @@
{ stdenv, lib, fetchFromGitHub }:
stdenv.mkDerivation rec {
pname = "rss-bridge";
version = "b037d1b4d1f0b0f422e21125ddef00a58e185ed1";
src = fetchFromGitHub {
owner = "RSS-Bridge";
repo = "rss-bridge";
rev = version;
sha256 = "sha256-zyWnjSYE2NFK/OJLnsFsE5oEyf+yrJe8TT6MH4roBwU=";
};
patches = [
./paths.patch
./youtube_bridge.patch
];
installPhase = ''
mkdir $out/
cp -R ./* $out
'';
meta = with lib; {
description = "The RSS feed for websites missing it";
homepage = "https://github.com/RSS-Bridge/rss-bridge";
license = licenses.unlicense;
maintainers = with maintainers; [ starr-dusT ];
platforms = platforms.all;
};
}

View File

@ -1,78 +0,0 @@
diff --git a/index.php b/index.php
index 123f6ecd..69071aa2 100644
--- a/index.php
+++ b/index.php
@@ -8,8 +8,8 @@ require_once __DIR__ . '/lib/bootstrap.php';
Configuration::verifyInstallation();
$customConfig = [];
-if (file_exists(__DIR__ . '/config.ini.php')) {
- $customConfig = parse_ini_file(__DIR__ . '/config.ini.php', true, INI_SCANNER_TYPED);
+if (file_exists(getenv('RSSBRIDGE_DATA') . '/config.ini.php')) {
+ $customConfig = parse_ini_file(getenv('RSSBRIDGE_DATA') . '/config.ini.php', true, INI_SCANNER_TYPED);
}
Configuration::loadConfiguration($customConfig, getenv());
diff --git a/lib/BridgeFactory.php b/lib/BridgeFactory.php
index ad433287..195c7af1 100644
--- a/lib/BridgeFactory.php
+++ b/lib/BridgeFactory.php
@@ -13,12 +13,18 @@ final class BridgeFactory
$this->cache = RssBridge::getCache();
$this->logger = RssBridge::getLogger();
- // Create all possible bridge class names from fs
+ // Create all possible bridge class names from original fs
foreach (scandir(__DIR__ . '/../bridges/') as $file) {
if (preg_match('/^([^.]+Bridge)\.php$/U', $file, $m)) {
$this->bridgeClassNames[] = $m[1];
}
}
+ // Create all possible bridge class names from additional fs
+ foreach (scandir(PATH_BRIDGES) as $file) {
+ if (preg_match('/^([^.]+Bridge)\.php$/U', $file, $m)) {
+ $this->bridgeClassNames[] = $m[1];
+ }
+ }
$enabledBridges = Configuration::getConfig('system', 'enabled_bridges');
if ($enabledBridges === null) {
diff --git a/lib/Configuration.php b/lib/Configuration.php
index c6fed0fd..672a5699 100644
--- a/lib/Configuration.php
+++ b/lib/Configuration.php
@@ -92,8 +92,8 @@ final class Configuration
}
}
- if (file_exists(__DIR__ . '/../whitelist.txt')) {
- $enabledBridges = trim(file_get_contents(__DIR__ . '/../whitelist.txt'));
+ if (file_exists(getenv('RSSBRIDGE_DATA') . '/whitelist.txt')) {
+ $enabledBridges = trim(file_get_contents(getenv('RSSBRIDGE_DATA') . '/whitelist.txt'));
if ($enabledBridges === '*') {
self::setConfig('system', 'enabled_bridges', ['*']);
} else {
diff --git a/lib/bootstrap.php b/lib/bootstrap.php
index a95de9dd..e8ed317f 100644
--- a/lib/bootstrap.php
+++ b/lib/bootstrap.php
@@ -7,7 +7,10 @@ const PATH_LIB_FORMATS = __DIR__ . '/../formats/';
const PATH_LIB_CACHES = __DIR__ . '/../caches/';
/** Path to the cache folder */
-const PATH_CACHE = __DIR__ . '/../cache/';
+define('PATH_CACHE', getenv('RSSBRIDGE_DATA') . '/cache/');
+
+/** Path to extra bridge files */
+define('PATH_BRIDGES', getenv('RSSBRIDGE_DATA') . '/bridges/');
/** URL to the RSS-Bridge repository */
const REPOSITORY = 'https://github.com/RSS-Bridge/rss-bridge/';
@@ -41,6 +44,7 @@ spl_autoload_register(function ($className) {
__DIR__ . '/../caches/',
__DIR__ . '/../formats/',
__DIR__ . '/../lib/',
+ PATH_BRIDGES,
];
foreach ($folders as $folder) {
$file = $folder . $className . '.php';

View File

@ -1,567 +0,0 @@
diff --git a/bridges/CustomYoutubeBridge.php b/bridges/CustomYoutubeBridge.php
new file mode 100644
index 00000000..d04c6ac0
--- /dev/null
+++ b/bridges/CustomYoutubeBridge.php
@@ -0,0 +1,561 @@
+<?php
+
+/**
+* Custom RssBridgeYoutube adapted from https://github.com/RSS-Bridge/rss-bridge/blob/master/bridges/YoutubeBridge.php
+* Returns the newest videos
+* WARNING: to parse big playlists (over ~90 videos), you need to edit simple_html_dom.php:
+* change: define('MAX_FILE_SIZE', 600000);
+* into: define('MAX_FILE_SIZE', 900000); (or more)
+*/
+class CustomYoutubeBridge extends BridgeAbstract
+{
+ const NAME = 'Custom YouTube Bridge';
+ const URI = 'https://www.youtube.com';
+ const CACHE_TIMEOUT = 60 * 60 * 3;
+ const DESCRIPTION = 'Returns the 10 newest videos by username/channel/playlist or search';
+
+ const PARAMETERS = [
+ 'By username' => [
+ 'u' => [
+ 'name' => 'username',
+ 'exampleValue' => 'LinusTechTips',
+ 'required' => true
+ ]
+ ],
+ 'By channel id' => [
+ 'c' => [
+ 'name' => 'channel id',
+ 'exampleValue' => 'UCw38-8_Ibv_L6hlKChHO9dQ',
+ 'required' => true
+ ]
+ ],
+ 'By custom name' => [
+ 'custom' => [
+ 'name' => 'custom name',
+ 'exampleValue' => 'LinusTechTips',
+ 'required' => true
+ ]
+ ],
+ 'By playlist Id' => [
+ 'p' => [
+ 'name' => 'playlist id',
+ 'exampleValue' => 'PL8mG-RkN2uTzJc8N0EoyhdC54prvBBLpj',
+ 'required' => true
+ ]
+ ],
+ 'Search result' => [
+ 's' => [
+ 'name' => 'search keyword',
+ 'exampleValue' => 'LinusTechTips',
+ 'required' => true
+ ],
+ 'pa' => [
+ 'name' => 'page',
+ 'type' => 'number',
+ 'title' => 'This option is not work anymore, as YouTube will always return the same page',
+ 'exampleValue' => 1
+ ]
+ ],
+ 'global' => [
+ 'duration_min' => [
+ 'name' => 'min. duration (minutes)',
+ 'type' => 'number',
+ 'title' => 'Minimum duration for the video in minutes',
+ 'exampleValue' => 5
+ ],
+ 'duration_max' => [
+ 'name' => 'max. duration (minutes)',
+ 'type' => 'number',
+ 'title' => 'Maximum duration for the video in minutes',
+ 'exampleValue' => 10
+ ]
+ ]
+ ];
+
+ private $feedName = '';
+ private $feeduri = '';
+ private $feedIconUrl = '';
+ // This took from repo BetterVideoRss of VerifiedJoseph.
+ const URI_REGEX = '/(https?:\/\/(?:www\.)?(?:[a-zA-Z0-9-.]{2,256}\.[a-z]{2,20})(\:[0-9]{2 ,4})?(?:\/[a-zA-Z0-9@:%_\+.,~#"\'!?&\/\/=\-*]+|\/)?)/ims'; //phpcs:ignore
+
+ public function collectData()
+ {
+ $cacheKey = 'youtube_rate_limit';
+ if ($this->cache->get($cacheKey)) {
+ throw new HttpException('429 Too Many Requests', 429);
+ }
+ try {
+ $this->collectDataInternal();
+ } catch (HttpException $e) {
+ if ($e->getCode() === 429) {
+ $this->cache->set($cacheKey, true, 60 * 16);
+ }
+ throw $e;
+ }
+ }
+
+ private function collectDataInternal()
+ {
+ $html = '';
+ $url_feed = '';
+ $url_listing = '';
+
+ $username = $this->getInput('u');
+ $channel = $this->getInput('c');
+ $custom = $this->getInput('custom');
+ $playlist = $this->getInput('p');
+ $search = $this->getInput('s');
+
+ $durationMin = $this->getInput('duration_min');
+ $durationMax = $this->getInput('duration_max');
+
+ // Whether to discriminate videos by duration
+ $filterByDuration = $durationMin || $durationMax;
+
+ if ($username) {
+ // user and channel
+ $url_feed = self::URI . '/feeds/videos.xml?user=' . urlencode($username);
+ $url_listing = self::URI . '/user/' . urlencode($username) . '/videos';
+ } elseif ($channel) {
+ $url_feed = self::URI . '/feeds/videos.xml?channel_id=' . urlencode($channel);
+ $url_listing = self::URI . '/channel/' . urlencode($channel) . '/videos';
+ } elseif ($custom) {
+ $url_listing = self::URI . '/' . urlencode($custom) . '/videos';
+ }
+
+ if ($url_feed || $url_listing) {
+ // user, channel or custom
+ $this->feeduri = $url_listing;
+ if ($custom) {
+ // Extract the feed url for the custom name
+ $html = $this->fetch($url_listing);
+ $jsonData = $this->extractJsonFromHtml($html);
+ // Pluck out the rss feed url
+ $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl;
+ $this->feedIconUrl = $jsonData->metadata->channelMetadataRenderer->avatar->thumbnails[0]->url;
+ }
+ if (!$custom) {
+ // Fetch the html page
+ $html = $this->fetch($url_listing);
+ $jsonData = $this->extractJsonFromHtml($html);
+ }
+ $channel_id = '';
+ if (isset($jsonData->contents)) {
+ $channel_id = $jsonData->metadata->channelMetadataRenderer->externalId;
+ $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1];
+ $jsonData = $jsonData->tabRenderer->content->richGridRenderer->contents;
+ // $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items;
+ $this->fetchItemsFromFromJsonData($jsonData);
+ } else {
+ returnServerError('Unable to get data from YouTube');
+ }
+ $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
+ } elseif ($playlist) {
+ // playlist
+ $url_feed = self::URI . '/feeds/videos.xml?playlist_id=' . urlencode($playlist);
+ $url_listing = self::URI . '/playlist?list=' . urlencode($playlist);
+ $html = $this->fetch($url_listing);
+ $jsonData = $this->extractJsonFromHtml($html);
+ // TODO: this method returns only first 100 video items
+ // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element
+ $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0];
+ $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer;
+ $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents;
+ $item_count = count($jsonData);
+
+ if ($item_count > 15 || $filterByDuration) {
+ $this->fetchItemsFromFromJsonData($jsonData);
+ } else {
+ $xml = $this->fetch($url_feed);
+ $this->extractItemsFromXmlFeed($xml);
+ }
+ $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
+ usort($this->items, function ($item1, $item2) {
+ if (!is_int($item1['timestamp']) && !is_int($item2['timestamp'])) {
+ $item1['timestamp'] = strtotime($item1['timestamp']);
+ $item2['timestamp'] = strtotime($item2['timestamp']);
+ }
+ return $item2['timestamp'] - $item1['timestamp'];
+ });
+ } elseif ($search) {
+ // search
+ $url_listing = self::URI . '/results?search_query=' . urlencode($search) . '&sp=CAI%253D';
+ $html = $this->fetch($url_listing);
+ $jsonData = $this->extractJsonFromHtml($html);
+ $jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents;
+ $jsonData = $jsonData->sectionListRenderer->contents;
+ foreach ($jsonData as $data) {
+ // Search result includes some ads, have to filter them
+ if (isset($data->itemSectionRenderer->contents[0]->videoRenderer)) {
+ $jsonData = $data->itemSectionRenderer->contents;
+ break;
+ }
+ }
+ $this->fetchItemsFromFromJsonData($jsonData);
+ $this->feeduri = $url_listing;
+ $this->feedName = 'Search: ' . $search;
+ } else {
+ returnClientError("You must either specify either:\n - YouTube username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)");
+ }
+ }
+
+ private function fetchVideoDetails($videoId, &$author, &$description, &$timestamp)
+ {
+ $url = self::URI . "/watch?v=$videoId";
+ $html = $this->fetch($url, true);
+
+ // Skip unavailable videos
+ if (strpos($html->innertext, 'IS_UNAVAILABLE_PAGE') !== false) {
+ return;
+ }
+
+ $elAuthor = $html->find('span[itemprop=author] > link[itemprop=name]', 0);
+ if (!is_null($elAuthor)) {
+ $author = $elAuthor->getAttribute('content');
+ }
+
+ $elDatePublished = $html->find('meta[itemprop=datePublished]', 0);
+ if (!is_null($elDatePublished)) {
+ $timestamp = strtotime($elDatePublished->getAttribute('content'));
+ }
+
+ $jsonData = $this->extractJsonFromHtml($html);
+ if (!isset($jsonData->contents)) {
+ return;
+ }
+
+ $jsonData = $jsonData->contents->twoColumnWatchNextResults->results->results->contents ?? null;
+ if (!$jsonData) {
+ throw new \Exception('Unable to find json data');
+ }
+ $videoSecondaryInfo = null;
+ foreach ($jsonData as $item) {
+ if (isset($item->videoSecondaryInfoRenderer)) {
+ $videoSecondaryInfo = $item->videoSecondaryInfoRenderer;
+ break;
+ }
+ }
+ if (!$videoSecondaryInfo) {
+ returnServerError('Could not find videoSecondaryInfoRenderer. Error at: ' . $videoId);
+ }
+
+ $description = $videoSecondaryInfo->attributedDescription->content ?? '';
+
+ // Default whitespace chars used by trim + non-breaking spaces (https://en.wikipedia.org/wiki/Non-breaking_space)
+ $whitespaceChars = " \t\n\r\0\x0B\u{A0}\u{2060}\u{202F}\u{2007}";
+ $descEnhancements = $this->ytBridgeGetVideoDescriptionEnhancements($videoSecondaryInfo, $description, self::URI, $whitespaceChars);
+ foreach ($descEnhancements as $descEnhancement) {
+ if (isset($descEnhancement['url'])) {
+ $descBefore = mb_substr($description, 0, $descEnhancement['pos']);
+ $descValue = mb_substr($description, $descEnhancement['pos'], $descEnhancement['len']);
+ $descAfter = mb_substr($description, $descEnhancement['pos'] + $descEnhancement['len'], null);
+
+ // Extended trim for the display value of internal links, e.g.:
+ // FAVICON • Video Name
+ // FAVICON / @ChannelName
+ $descValue = trim($descValue, $whitespaceChars . '•/');
+
+ $description = sprintf('%s<a href="%s" target="_blank">%s</a>%s', $descBefore, $descEnhancement['url'], $descValue, $descAfter);
+ }
+ }
+ }
+
+ private function ytBridgeGetVideoDescriptionEnhancements(
+ object $videoSecondaryInfo,
+ string $descriptionContent,
+ string $baseUrl,
+ string $whitespaceChars
+ ): array {
+ $commandRuns = $videoSecondaryInfo->attributedDescription->commandRuns ?? [];
+ if (count($commandRuns) <= 0) {
+ return [];
+ }
+
+ $enhancements = [];
+
+ $boundaryWhitespaceChars = mb_str_split($whitespaceChars);
+ $boundaryStartChars = array_merge($boundaryWhitespaceChars, [':', '-', '(']);
+ $boundaryEndChars = array_merge($boundaryWhitespaceChars, [',', '.', "'", ')']);
+ $hashtagBoundaryEndChars = array_merge($boundaryEndChars, ['#', '-']);
+
+ $descriptionContentLength = mb_strlen($descriptionContent);
+
+ $minPositionOffset = 0;
+
+ $prevStartPosition = 0;
+ $totalLength = 0;
+ $maxPositionByStartIndex = [];
+ foreach (array_reverse($commandRuns) as $commandRun) {
+ $endPosition = $commandRun->startIndex + $commandRun->length;
+ if ($endPosition < $prevStartPosition) {
+ $totalLength += 1;
+ }
+ $totalLength += $commandRun->length;
+ $maxPositionByStartIndex[$commandRun->startIndex] = $totalLength;
+ $prevStartPosition = $commandRun->startIndex;
+ }
+
+ foreach ($commandRuns as $commandRun) {
+ $commandMetadata = $commandRun->onTap->innertubeCommand->commandMetadata->webCommandMetadata ?? null;
+ if (!isset($commandMetadata)) {
+ continue;
+ }
+
+ $enhancement = null;
+
+ /*
+ $commandRun->startIndex can be offset by few positions in the positive direction
+ when some multibyte characters (e.g. emojis, but maybe also others) are used in the plain text video description.
+ (probably some difference between php and javascript in handling multibyte characters)
+ This loop should correct the position in most cases. It searches for the next word (determined by a set of boundary chars) with the expected length.
+ Several safeguards ensure that the correct word is chosen. When a link can not be matched,
+ everything will be discarded to prevent corrupting the description.
+ Hashtags require a different set of boundary chars.
+ */
+ $isHashtag = $commandMetadata->webPageType === 'WEB_PAGE_TYPE_BROWSE';
+ $prevEnhancement = end($enhancements);
+ $minPosition = $prevEnhancement === false ? 0 : $prevEnhancement['pos'] + $prevEnhancement['len'];
+ $maxPosition = $descriptionContentLength - $maxPositionByStartIndex[$commandRun->startIndex];
+ $position = min($commandRun->startIndex - $minPositionOffset, $maxPosition);
+ while ($position >= $minPosition) {
+ // The link display value can only ever include a new line at the end (which will be removed further below), never in between.
+ $newLinePosition = mb_strpos($descriptionContent, "\n", $position);
+ if ($newLinePosition !== false && $newLinePosition < $position + ($commandRun->length - 1)) {
+ $position = $newLinePosition - ($commandRun->length - 1);
+ continue;
+ }
+
+ $firstChar = mb_substr($descriptionContent, $position, 1);
+ $boundaryStart = mb_substr($descriptionContent, $position - 1, 1);
+ $boundaryEndIndex = $position + $commandRun->length;
+ $boundaryEnd = mb_substr($descriptionContent, $boundaryEndIndex, 1);
+
+ $boundaryStartIsValid = $position === 0 ||
+ in_array($boundaryStart, $boundaryStartChars) ||
+ ($isHashtag && $firstChar === '#');
+ $boundaryEndIsValid = $boundaryEndIndex === $descriptionContentLength ||
+ in_array($boundaryEnd, $isHashtag ? $hashtagBoundaryEndChars : $boundaryEndChars);
+
+ if ($boundaryStartIsValid && $boundaryEndIsValid) {
+ $minPositionOffset = $commandRun->startIndex - $position;
+ $enhancement = [
+ 'pos' => $position,
+ 'len' => $commandRun->length,
+ ];
+ break;
+ }
+
+ $position--;
+ }
+
+ if (!isset($enhancement)) {
+ $this->logger->debug(sprintf('Position %d cannot be corrected in "%s"', $commandRun->startIndex, substr($descriptionContent, 0, 50) . '...'));
+ // Skip to prevent the description from becoming corrupted
+ continue;
+ }
+
+ // $commandRun->length sometimes incorrectly includes the newline as last char
+ $lastChar = mb_substr($descriptionContent, $enhancement['pos'] + $enhancement['len'] - 1, 1);
+ if ($lastChar === "\n") {
+ $enhancement['len'] -= 1;
+ }
+
+ $commandUrl = parse_url($commandMetadata->url);
+ if ($commandUrl['path'] === '/redirect') {
+ parse_str($commandUrl['query'], $commandUrlQuery);
+ $enhancement['url'] = urldecode($commandUrlQuery['q']);
+ } elseif (isset($commandUrl['host'])) {
+ $enhancement['url'] = $commandMetadata->url;
+ } else {
+ $enhancement['url'] = $baseUrl . $commandMetadata->url;
+ }
+
+ $enhancements[] = $enhancement;
+ }
+
+ if (count($enhancements) !== count($commandRuns)) {
+ // At least one link can not be matched. Discard everything to prevent corrupting the description.
+ return [];
+ }
+
+ // Sort by position in descending order to be able to safely replace values
+ return array_reverse($enhancements);
+ }
+
+ private function extractItemsFromXmlFeed($xml)
+ {
+ $this->feedName = $this->decodeTitle($xml->find('feed > title', 0)->plaintext);
+
+ foreach ($xml->find('entry') as $element) {
+ $videoId = str_replace('yt:video:', '', $element->find('id', 0)->plaintext);
+ if (strpos($videoId, 'googleads') !== false) {
+ continue;
+ }
+ $title = $this->decodeTitle($element->find('title', 0)->plaintext);
+ $author = $element->find('name', 0)->plaintext;
+ $desc = $element->find('media:description', 0)->innertext;
+ $desc = htmlspecialchars($desc);
+ $desc = nl2br($desc);
+ $desc = preg_replace(self::URI_REGEX, '<a href="$1" target="_blank">$1</a> ', $desc);
+ $time = strtotime($element->find('published', 0)->plaintext);
+ $this->addItem($videoId, $title, $author, $desc, $time);
+ }
+ }
+
+ private function fetch($url, bool $cache = false)
+ {
+ $header = ['Accept-Language: en-US'];
+ $ttl = 86400 * 3; // 3d
+ $stripNewlines = false;
+ if ($cache) {
+ return getSimpleHTMLDOMCached($url, $ttl, $header, [], true, true, DEFAULT_TARGET_CHARSET, $stripNewlines);
+ }
+ return getSimpleHTMLDOM($url, $header, [], true, true, DEFAULT_TARGET_CHARSET, $stripNewlines);
+ }
+
+ private function extractJsonFromHtml($html)
+ {
+ $scriptRegex = '/var ytInitialData = (.*?);<\/script>/';
+ $result = preg_match($scriptRegex, $html, $matches);
+ if (! $result) {
+ $this->logger->debug('Could not find ytInitialData');
+ return null;
+ }
+ $data = json_decode($matches[1]);
+ return $data;
+ }
+
+ private function fetchItemsFromFromJsonData($jsonData)
+ {
+ $minimumDurationSeconds = ($this->getInput('duration_min') ?: -1) * 60;
+ $maximumDurationSeconds = ($this->getInput('duration_max') ?: INF) * 60;
+
+ foreach ($jsonData as $item) {
+ $wrapper = null;
+ if (isset($item->gridVideoRenderer)) {
+ $wrapper = $item->gridVideoRenderer;
+ } elseif (isset($item->videoRenderer)) {
+ $wrapper = $item->videoRenderer;
+ } elseif (isset($item->playlistVideoRenderer)) {
+ $wrapper = $item->playlistVideoRenderer;
+ } elseif (isset($item->richItemRenderer)) {
+ $wrapper = $item->richItemRenderer->content->videoRenderer;
+ } else {
+ continue;
+ }
+
+ // 01:03:30 | 15:06 | 1:24
+ $lengthText = $wrapper->lengthText->simpleText ?? null;
+ // 6,875 views
+ $viewCount = $wrapper->viewCountText->simpleText ?? null;
+ // Dc645M8Het8
+ $videoId = $wrapper->videoId;
+ // Jumbo frames - transfer more data faster!
+ $title = $wrapper->title->runs[0]->text ?? $wrapper->title->accessibility->accessibilityData->label ?? null;
+ $author = null;
+ $description = $wrapper->descriptionSnippet->runs[0]->text ?? null;
+ // 5 days ago | 1 month ago
+ $publishedTimeText = $wrapper->publishedTimeText->simpleText ?? $wrapper->videoInfo->runs[2]->text ?? null;
+ $timestamp = null;
+ if ($publishedTimeText) {
+ try {
+ $publicationDate = new \DateTimeImmutable($publishedTimeText);
+ // Hard-code hour, minute and second
+ $publicationDate = $publicationDate->setTime(0, 0, 0);
+ $timestamp = $publicationDate->getTimestamp();
+ } catch (\Exception $e) {
+ }
+ }
+
+ $durationText = 0;
+ if ($lengthText) {
+ $durationText = $lengthText;
+ } else {
+ foreach ($wrapper->thumbnailOverlays as $overlay) {
+ if (isset($overlay->thumbnailOverlayTimeStatusRenderer)) {
+ $durationText = $overlay->thumbnailOverlayTimeStatusRenderer->text;
+ break;
+ }
+ }
+ }
+ if (is_string($durationText)) {
+ if (preg_match('/([\d]{1,2})\:([\d]{1,2})\:([\d]{2})/', $durationText)) {
+ $durationText = preg_replace('/([\d]{1,2})\:([\d]{1,2})\:([\d]{2})/', '$1:$2:$3', $durationText);
+ } else {
+ $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText);
+ }
+ sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds);
+ $duration = $hours * 3600 + $minutes * 60 + $seconds;
+ if ($duration < $minimumDurationSeconds || $duration > $maximumDurationSeconds) {
+ continue;
+ }
+ }
+ # Re-fetch better details from xml
+ $this->fetchVideoDetails($videoId, $author, $description, $timestamp);
+ $this->addItem($videoId, $title, $author, $description, $timestamp, $durationText);
+ if (count($this->items) >= 99) {
+ break;
+ }
+ }
+ }
+
+ private function addItem($videoId, $title, $author, $description, $timestamp, $durationText, $thumbnail = '')
+ {
+ $description = nl2br($description);
+
+ $item = [];
+ // This should probably be uid?
+ $item['id'] = $videoId;
+ $item['title'] = $title . " [" . $durationText . "]";
+ $item['author'] = $author ?? '';
+ $item['timestamp'] = $timestamp;
+ $item['uri'] = self::URI . '/watch?v=' . $videoId;
+ if (!$thumbnail) {
+ // Fallback to default thumbnail if there aren't any provided.
+ $thumbnail = '0';
+ }
+ $thumbnailUri = str_replace('/www.', '/img.', self::URI) . '/vi/' . $videoId . '/' . $thumbnail . '.jpg';
+ $item['content'] = sprintf('<a href="%s"><img src="%s" /></a><br />%s', $item['uri'], $thumbnailUri, $description);
+ $this->items[] = $item;
+ }
+
+ private function decodeTitle($title)
+ {
+ // convert both &#1234; and &quot; to UTF-8
+ return html_entity_decode($title, ENT_QUOTES, 'UTF-8');
+ }
+
+ public function getURI()
+ {
+ if (!is_null($this->getInput('p'))) {
+ return static::URI . '/playlist?list=' . $this->getInput('p');
+ } elseif ($this->feeduri) {
+ return $this->feeduri;
+ }
+
+ return parent::getURI();
+ }
+
+ public function getName()
+ {
+ switch ($this->queriedContext) {
+ case 'By username':
+ case 'By channel id':
+ case 'By custom name':
+ case 'By playlist Id':
+ case 'Search result':
+ return htmlspecialchars_decode($this->feedName) . ' - YouTube';
+ default:
+ return parent::getName();
+ }
+ }
+
+ public function getIcon()
+ {
+ if (empty($this->feedIconUrl)) {
+ return parent::getIcon();
+ } else {
+ return $this->feedIconUrl;
+ }
+ }
+}