Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Leap:15.6:Update
youtube-dl
30713-new-ceskatelevize.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 30713-new-ceskatelevize.patch of Package youtube-dl
From d799c405010e50f75e253ace218de9c4be8744a9 Mon Sep 17 00:00:00 2001 From: dirkf <fieldhouse@gmx.net> Date: Tue, 1 Mar 2022 08:11:50 +0000 Subject: [PATCH] [extractor/ceskatelevize] Back-port extractor from yt-dlp * back-port extractor with traverse_obj() shim, removing CeskaTelevizePoradyIE * follow redirect URL * support liveBroadcast and videobonusDetail in __NEXT__ data * id must be compat_str * return single video for singleton playlist * fix/add tests --- youtube_dl/compat.py | 4 youtube_dl/extractor/ceskatelevize.py | 277 ++++++++++++++++++++++++---------- youtube_dl/extractor/extractors.py | 5 3 files changed, 203 insertions(+), 83 deletions(-) --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -21,6 +21,10 @@ import subprocess import sys import xml.etree.ElementTree +try: + import collections.abc as compat_collections_abc +except ImportError: + import collections as compat_collections_abc try: import urllib.request as compat_urllib_request --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -12,70 +12,243 @@ from ..utils import ( ExtractorError, float_or_none, sanitized_Request, - unescapeHTML, - update_url_query, + str_or_none, urlencode_postdata, USER_AGENTS, ) +try: + from ..utils import traverse_obj +except ImportError: + from ..compat import (compat_str, compat_collections_abc) + from ..utils import int_or_none + + def traverse_obj(obj, *path_list, **kw): + ''' Traverse nested list/dict/tuple''' + + # parameter defaults + default = kw.get('default') + expected_type = kw.get('expected_type') + get_all = kw.get('get_all', True) + casesense = kw.get('casesense', True) + is_user_input = kw.get('is_user_input', False) + traverse_string = kw.get('traverse_string', False) + + def variadic(x, allowed_types=(compat_str, bytes)): + return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,) + + def listish(l): + return isinstance(l, (list, tuple)) + + def from_iterable(iterables): + # chain.from_iterable(['ABC', 'DEF']) --> A B C D E F + for it in iterables: + for element in it: + yield element + + class Nonlocal: + pass + nl = Nonlocal() + + if not casesense: + _lower = lambda k: (k.lower() if isinstance(k, compat_str) else k) + path_list = (map(_lower, variadic(path)) for path in path_list) + + def _traverse_obj(obj, path, _current_depth=0): + path = tuple(variadic(path)) + for i, key in enumerate(path): + if obj is None: + return None + if listish(key): + obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key] + key = Ellipsis + if key is Ellipsis: + obj = (obj.values() if isinstance(obj, dict) + else obj if listish(obj) + else compat_str(obj) if traverse_string else []) + _current_depth += 1 + nl.depth = max(nl.depth, _current_depth) + return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj] + elif callable(key): + if listish(obj): + obj = enumerate(obj) + elif isinstance(obj, dict): + obj = obj.items() + else: + if not traverse_string: + return None + obj = str(obj) + _current_depth += 1 + nl.depth = max(nl.depth, _current_depth) + return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)] + elif isinstance(obj, dict) and not (is_user_input and key == ':'): + obj = (obj.get(key) if casesense or (key in obj) + else next((v for k, v in obj.items() if _lower(k) == key), None)) + else: + if is_user_input: + key = (int_or_none(key) if ':' not in key + else slice(*map(int_or_none, key.split(':')))) + if key == slice(None): + return _traverse_obj(obj, tuple([Ellipsis] + list(path[i + 1:])), _current_depth) + if not isinstance(key, (int, slice)): + return None + if not listish(obj): + if not traverse_string: + return None + obj = compat_str(obj) + try: + obj = obj[key] + except IndexError: + return None + return obj + + if isinstance(expected_type, type): + type_test = lambda val: val if isinstance(val, expected_type) else None + elif expected_type is not None: + type_test = expected_type + else: + type_test = lambda val: val + + for path in path_list: + nl.depth = 0 + val = _traverse_obj(obj, path) + if val is not None: + if nl.depth: + for _ in range(nl.depth - 1): + val = from_iterable(v for v in val if v is not None) + val = [v for v in map(type_test, val) if v is not None] + if val: + return val if get_all else val[0] + else: + val = type_test(val) + if val is not None: + return val + return default + class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' _TESTS = [{ - 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', + 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', 'info_dict': { - 'id': '61924494877246241', + 'id': '61924494877028507', 'ext': 'mp4', - 'title': 'Hyde Park Civilizace: Život v Grónsku', - 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626', + 'title': 'Bonus 01 - En - Hyde Park Civilizace', + 'description': 'English Subtittles', 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 3350, + 'duration': 81.3, }, 'params': { # m3u8 download 'skip_download': True, }, }, { - 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', + # live stream + 'url': 'http://www.ceskatelevize.cz/zive/ct1/', 'info_dict': { - 'id': '61924494877028507', + 'id': '102', 'ext': 'mp4', - 'title': 'Hyde Park Civilizace: Bonus 01 - En', - 'description': 'English Subtittles', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 81.3, + 'title': r'ČT1 - živé vysílání online', + 'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.', + 'is_live': True, }, 'params': { # m3u8 download 'skip_download': True, }, }, { - # live stream + # another 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', + 'only_matching': True, 'info_dict': { 'id': 402, 'ext': 'mp4', 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'is_live': True, }, + # 'skip': 'Georestricted to Czech Republic', + }, { + 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', + 'only_matching': True, + }, { + # video with 18+ caution trailer + 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', + 'info_dict': { + 'id': '215562210900007-bogotart', + 'title': 'Bogotart - Queer', + 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti', + }, + 'playlist': [{ + 'info_dict': { + 'id': '61924494877311053', + 'ext': 'mp4', + 'title': 'Bogotart - Queer (Varování 18+)', + 'duration': 11.9, + }, + }, { + 'info_dict': { + 'id': '61924494877068022', + 'ext': 'mp4', + 'title': 'Bogotart - Queer (Queer)', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 1558.3, + }, + }], 'params': { # m3u8 download 'skip_download': True, }, - 'skip': 'Georestricted to Czech Republic', }, { - 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', + # iframe embed + 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', 'only_matching': True, }] + def _search_nextjs_data(self, webpage, video_id, **kw): + return self._parse_json( + self._search_regex( + r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>', + webpage, 'next.js data', **kw), + video_id, **kw) + def _real_extract(self, url): playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) + webpage, urlh = self._download_webpage_handle(url, playlist_id) + parsed_url = compat_urllib_parse_urlparse(urlh.geturl()) + site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') + playlist_title = self._og_search_title(webpage, default=None) + if site_name and playlist_title: + playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0] + playlist_description = self._og_search_description(webpage, default=None) + if playlist_description: + playlist_description = playlist_description.replace('\xa0', ' ') + + type_ = 'IDEC' + if re.search(r'(^/porady|/zive)/', parsed_url.path): + next_data = self._search_nextjs_data(webpage, playlist_id) + if '/zive/' in parsed_url.path: + idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False) + else: + idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) + if not idec: + idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False) + if idec: + type_ = 'bonus' + if not idec: + raise ExtractorError('Failed to find IDEC id') + iframe_hash = self._download_webpage( + 'https://www.ceskatelevize.cz/v-api/iframe-hash/', + playlist_id, note='Getting IFRAME hash') + query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, } + webpage = self._download_webpage( + 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', + playlist_id, note='Downloading player', query=query) NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' if '%s</p>' % NOT_AVAILABLE_STRING in webpage: - raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) + self.raise_geo_restricted(NOT_AVAILABLE_STRING) + if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )): + raise ExtractorError('no video with IDEC available', video_id=idec, expected=True) type_ = None episode_id = None @@ -100,7 +273,7 @@ class CeskaTelevizeIE(InfoExtractor): data = { 'playlist[0][type]': type_, 'playlist[0][id]': episode_id, - 'requestUrl': compat_urllib_parse_urlparse(url).path, + 'requestUrl': parsed_url.path, 'requestSource': 'iVysilani', } @@ -108,7 +281,7 @@ class CeskaTelevizeIE(InfoExtractor): for user_agent in (None, USER_AGENTS['Safari']): req = sanitized_Request( - 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', + 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/', data=urlencode_postdata(data)) req.add_header('Content-type', 'application/x-www-form-urlencoded') @@ -130,9 +303,6 @@ class CeskaTelevizeIE(InfoExtractor): req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) req.add_header('Referer', url) - playlist_title = self._og_search_title(webpage, default=None) - playlist_description = self._og_search_description(webpage, default=None) - playlist = self._download_json(req, playlist_id, fatal=False) if not playlist: continue @@ -167,7 +337,7 @@ class CeskaTelevizeIE(InfoExtractor): entries[num]['formats'].extend(formats) continue - item_id = item.get('id') or item['assetId'] + item_id = str_or_none(item.get('id') or item['assetId']) title = item['title'] duration = float_or_none(item.get('duration')) @@ -181,8 +351,6 @@ class CeskaTelevizeIE(InfoExtractor): if playlist_len == 1: final_title = playlist_title or title - if is_live: - final_title = self._live_title(final_title) else: final_title = '%s (%s)' % (playlist_title, title) @@ -200,6 +368,8 @@ class CeskaTelevizeIE(InfoExtractor): for e in entries: self._sort_formats(e['formats']) + if len(entries) == 1: + return entries[0] return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) def _get_subtitles(self, episode_id, subs): @@ -236,54 +406,3 @@ class CeskaTelevizeIE(InfoExtractor): yield line return '\r\n'.join(_fix_subtitle(subtitles)) - - -class CeskaTelevizePoradyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' - _TESTS = [{ - # video with 18+ caution trailer - 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', - 'info_dict': { - 'id': '215562210900007-bogotart', - 'title': 'Queer: Bogotart', - 'description': 'Alternativní průvodce současným queer světem', - }, - 'playlist': [{ - 'info_dict': { - 'id': '61924494876844842', - 'ext': 'mp4', - 'title': 'Queer: Bogotart (Varování 18+)', - 'duration': 10.2, - }, - }, { - 'info_dict': { - 'id': '61924494877068022', - 'ext': 'mp4', - 'title': 'Queer: Bogotart (Queer)', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 1558.3, - }, - }], - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - # iframe embed - 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - data_url = update_url_query(unescapeHTML(self._search_regex( - (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1', - r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), - webpage, 'iframe player url', group='url')), query={ - 'autoStart': 'true', - }) - - return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -202,10 +202,7 @@ from .ccc import ( from .ccma import CCMAIE from .cctv import CCTVIE from .cda import CDAIE -from .ceskatelevize import ( - CeskaTelevizeIE, - CeskaTelevizePoradyIE, -) +from .ceskatelevize import CeskaTelevizeIE from .channel9 import Channel9IE from .charlierose import CharlieRoseIE from .chaturbate import ChaturbateIE
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor