feat: request cache and request lock

pull/8/head
WorldObservationLog 5 months ago
parent 43fb4679a7
commit 43333c6631
  1. 13
      poetry.lock
  2. 1
      pyproject.toml
  3. 76
      src/api.py
  4. 9
      src/mp4.py

13
poetry.lock generated

@ -87,6 +87,17 @@ files = [
[package.extras]
tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"]
[[package]]
name = "async-lru"
version = "2.0.4"
description = "Simple LRU cache for asyncio"
optional = false
python-versions = ">=3.8"
files = [
{file = "async-lru-2.0.4.tar.gz", hash = "sha256:b8a59a5df60805ff63220b2a0c5b5393da5521b113cd5465a44eb037d81a5627"},
{file = "async_lru-2.0.4-py3-none-any.whl", hash = "sha256:ff02944ce3c288c5be660c42dbcca0742b32c3b279d6dceda655190240b99224"},
]
[[package]]
name = "attrs"
version = "23.2.0"
@ -1758,4 +1769,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "cde2194f4da1bd7402b4ffaed566937745f18e0758784876a62b1914faec9c00"
content-hash = "6eb629b08089983b830419b9b143f4ebcbb7e824ed4110cc3a4aba25e13e5df7"

@ -21,6 +21,7 @@ frida = "^16.2.1"
tenacity = "^8.2.3"
prompt-toolkit = "^3.0.43"
mitmproxy = "^10.3.0"
async-lru = "^2.0.4"
[build-system]
requires = ["poetry-core"]

@ -5,6 +5,7 @@ from ssl import SSLError
import httpcore
import httpx
import regex
from async_lru import alru_cache
from loguru import logger
from tenacity import retry, retry_if_exception_type, stop_after_attempt, before_sleep_log
@ -12,22 +13,25 @@ from src.models import *
from src.models.song_data import Datum
client: httpx.AsyncClient
lock: asyncio.Semaphore
download_lock: asyncio.Semaphore
request_lock: asyncio.Semaphore
user_agent_browser = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
user_agent_itunes = "iTunes/12.11.3 (Windows; Microsoft Windows 10 x64 Professional Edition (Build 19041); x64) AppleWebKit/7611.1022.4001.1 (dt:2)"
user_agent_app = "Music/5.7 Android/10 model/Pixel6GR1YH build/1234 (dt:66)"
def init_client_and_lock(proxy: str, parallel_num: int):
global client, lock
global client, download_lock, request_lock
if proxy:
client = httpx.AsyncClient(proxy=proxy)
else:
client = httpx.AsyncClient()
lock = asyncio.Semaphore(parallel_num)
download_lock = asyncio.Semaphore(parallel_num)
request_lock = asyncio.Semaphore(64)
async def get_m3u8_from_api(endpoint: str, song_id: str) -> str:
async with request_lock:
resp = (await client.get(endpoint, params={"songid": song_id})).text
if resp == "no_found":
return ""
@ -35,6 +39,7 @@ async def get_m3u8_from_api(endpoint: str, song_id: str) -> str:
async def upload_m3u8_to_api(endpoint: str, m3u8_url: str, song_info: Datum):
async with request_lock:
await client.post(endpoint, json={
"method": "add_m3u8",
"params": {
@ -51,6 +56,7 @@ async def upload_m3u8_to_api(endpoint: str, m3u8_url: str, song_info: Datum):
stop=stop_after_attempt(5),
before_sleep=before_sleep_log(logger, logging.WARN))
async def get_token():
async with request_lock:
req = await client.get("https://beta.music.apple.com")
index_js_uri = regex.findall(r"/assets/index-legacy-[^/]+\.js", req.text)[0]
js_req = await client.get("https://beta.music.apple.com" + index_js_uri)
@ -58,18 +64,23 @@ async def get_token():
return token
@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)),
@alru_cache
@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError,
httpcore.RemoteProtocolError)),
stop=stop_after_attempt(5),
before_sleep=before_sleep_log(logger, logging.WARN))
async def download_song(url: str) -> bytes:
async with lock:
async with download_lock:
return (await client.get(url)).content
@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)),
@alru_cache
@retry(retry=retry_if_exception_type(
(httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)),
stop=stop_after_attempt(5),
before_sleep=before_sleep_log(logger, logging.WARN))
async def get_album_info(album_id: str, token: str, storefront: str, lang: str):
async with request_lock:
req = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/albums/{album_id}",
params={"omit[resource]": "autos", "include": "tracks,artists,record-labels",
"include[songs]": "artists", "fields[artists]": "name",
@ -80,10 +91,13 @@ async def get_album_info(album_id: str, token: str, storefront: str, lang: str):
return AlbumMeta.model_validate(req.json())
@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)),
@alru_cache
@retry(retry=retry_if_exception_type(
(httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)),
stop=stop_after_attempt(5),
before_sleep=before_sleep_log(logger, logging.WARN))
async def get_playlist_info_and_tracks(playlist_id: str, token: str, storefront: str, lang: str):
async with request_lock:
resp = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/playlists/{playlist_id}",
params={"l": lang},
headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser,
@ -95,11 +109,15 @@ async def get_playlist_info_and_tracks(playlist_id: str, token: str, storefront:
return playlist_info_obj
@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)),
@alru_cache
@retry(retry=retry_if_exception_type(
(httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)),
stop=stop_after_attempt(5),
before_sleep=before_sleep_log(logger, logging.WARN))
async def get_playlist_tracks(playlist_id: str, token: str, storefront: str, lang: str, offset: int = 0):
resp = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/playlists/{playlist_id}/tracks",
async with request_lock:
resp = await client.get(
f"https://amp-api.music.apple.com/v1/catalog/{storefront}/playlists/{playlist_id}/tracks",
params={"l": lang, "offset": offset},
headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser,
"Origin": "https://music.apple.com"})
@ -111,20 +129,26 @@ async def get_playlist_tracks(playlist_id: str, token: str, storefront: str, lan
return tracks
@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)),
@alru_cache
@retry(retry=retry_if_exception_type(
(httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)),
stop=stop_after_attempt(5),
before_sleep=before_sleep_log(logger, logging.WARN))
async def get_cover(url: str, cover_format: str, cover_size: str):
async with request_lock:
formatted_url = regex.sub('bb.jpg', f'bb.{cover_format}', url)
req = await client.get(formatted_url.replace("{w}x{h}", cover_size),
headers={"User-Agent": user_agent_browser})
return req.content
@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)),
@alru_cache
@retry(retry=retry_if_exception_type(
(httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)),
stop=stop_after_attempt(5),
before_sleep=before_sleep_log(logger, logging.WARN))
async def get_song_info(song_id: str, token: str, storefront: str, lang: str):
async with request_lock:
req = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/songs/{song_id}",
params={"extend": "extendedAssetUrls", "include": "albums", "l": lang},
headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_itunes,
@ -136,10 +160,13 @@ async def get_song_info(song_id: str, token: str, storefront: str, lang: str):
return None
@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)),
@alru_cache
@retry(retry=retry_if_exception_type(
(httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)),
stop=stop_after_attempt(5),
before_sleep=before_sleep_log(logger, logging.WARN))
async def get_song_lyrics(song_id: str, storefront: str, token: str, dsid: str, account_token: str, lang: str) -> str:
async with request_lock:
req = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/songs/{song_id}/lyrics",
params={"l": lang},
headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_app,
@ -149,10 +176,13 @@ async def get_song_lyrics(song_id: str, storefront: str, token: str, dsid: str,
return result.data[0].attributes.ttml
@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)),
@alru_cache
@retry(retry=retry_if_exception_type(
(httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)),
stop=stop_after_attempt(5),
before_sleep=before_sleep_log(logger, logging.WARN))
async def get_albums_from_artist(artist_id: str, storefront: str, token: str, lang: str, offset: int = 0):
async with request_lock:
resp = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/artists/{artist_id}/albums",
params={"l": lang, "offset": offset},
headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser,
@ -165,10 +195,13 @@ async def get_albums_from_artist(artist_id: str, storefront: str, token: str, la
return list(set(albums))
@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)),
@alru_cache
@retry(retry=retry_if_exception_type(
(httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)),
stop=stop_after_attempt(5),
before_sleep=before_sleep_log(logger, logging.WARN))
async def get_songs_from_artist(artist_id: str, storefront: str, token: str, lang: str, offset: int = 0):
async with request_lock:
resp = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/artists/{artist_id}/songs",
params={"l": lang, "offset": offset},
headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser,
@ -181,12 +214,25 @@ async def get_songs_from_artist(artist_id: str, storefront: str, token: str, lan
return list[set(songs)]
@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)),
@alru_cache
@retry(retry=retry_if_exception_type(
(httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)),
stop=stop_after_attempt(5),
before_sleep=before_sleep_log(logger, logging.WARN))
async def get_artist_info(artist_id: str, storefront: str, token: str, lang: str):
async with request_lock:
resp = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/artists/{artist_id}",
params={"l": lang},
headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser,
"Origin": "https://music.apple.com"})
return ArtistInfo.parse_obj(resp.json())
@alru_cache
@retry(retry=retry_if_exception_type(
(httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)),
stop=stop_after_attempt(5),
before_sleep=before_sleep_log(logger, logging.WARN))
async def download_m3u8(m3u8_url: str) -> str:
async with request_lock:
return (await client.get(m3u8_url)).text

@ -1,16 +1,17 @@
import subprocess
import uuid
from datetime import datetime
from io import BytesIO
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Tuple
from datetime import datetime
import m3u8
import regex
from bs4 import BeautifulSoup
from loguru import logger
from src.api import download_m3u8
from src.exceptions import CodecNotFoundException
from src.metadata import SongMetadata
from src.types import *
@ -18,7 +19,7 @@ from src.utils import find_best_codec, get_codec_from_codec_id, get_suffix
async def get_available_codecs(m3u8_url: str) -> Tuple[list[str], list[str]]:
parsed_m3u8 = m3u8.load(m3u8_url)
parsed_m3u8 = m3u8.loads(await download_m3u8(m3u8_url), uri=m3u8_url)
codec_ids = [playlist.stream_info.audio for playlist in parsed_m3u8.playlists]
codecs = [get_codec_from_codec_id(codec_id) for codec_id in codec_ids]
return codecs, codec_ids
@ -26,7 +27,7 @@ async def get_available_codecs(m3u8_url: str) -> Tuple[list[str], list[str]]:
async def extract_media(m3u8_url: str, codec: str, song_metadata: SongMetadata,
codec_priority: list[str], alternative_codec: bool = False) -> Tuple[str, list[str]]:
parsed_m3u8 = m3u8.load(m3u8_url)
parsed_m3u8 = m3u8.loads(await download_m3u8(m3u8_url), uri=m3u8_url)
specifyPlaylist = find_best_codec(parsed_m3u8, codec)
if not specifyPlaylist and alternative_codec:
logger.warning(f"Codec {codec} of song: {song_metadata.artist} - {song_metadata.title} did not found")
@ -39,7 +40,7 @@ async def extract_media(m3u8_url: str, codec: str, song_metadata: SongMetadata,
raise CodecNotFoundException
selected_codec = specifyPlaylist.media[0].group_id
logger.info(f"Selected codec: {selected_codec} for song: {song_metadata.artist} - {song_metadata.title}")
stream = m3u8.load(specifyPlaylist.absolute_uri)
stream = m3u8.loads(await download_m3u8(specifyPlaylist.absolute_uri), uri=specifyPlaylist.absolute_uri)
skds = [key.uri for key in stream.keys if regex.match('(skd?://[^"]*)', key.uri)]
keys = [prefetchKey]
key_suffix = CodecKeySuffix.KeySuffixDefault

Loading…
Cancel
Save