From 3fe103b6b722269e1094336a8578edd320880177 Mon Sep 17 00:00:00 2001 From: garret Date: Wed, 12 Jul 2023 18:25:45 +0100 Subject: switch to unix line endings really should have done this at the start but oh well --- walker.py | 242 +++++++++++++++++++++++++++++++------------------------------- 1 file changed, 121 insertions(+), 121 deletions(-) diff --git a/walker.py b/walker.py index 2573725..ecd8209 100644 --- a/walker.py +++ b/walker.py @@ -1,121 +1,121 @@ -import argparse -import json -import logging -import os -import re -import requests -import sys -import urllib.parse - -from binascii import unhexlify - -parser = argparse.ArgumentParser() -parser.add_argument('cid', help='content id, chapter URL, or series URL') -parser.add_argument('-v', '--verbose', help='log more', action="store_true") -args = parser.parse_args() - -logging.basicConfig(format='[%(levelname)s] %(asctime)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO) - -def start(url, headers): - meta = requests.get(url=url, headers=headers).json() - img_url = f'{url}/frames?enable_webp=true' - - try: - cid_info = { - "TITLE": meta['data']['extra']['content']['title'], - "CHAPTER": meta['data']['result']['title'] - } - - except KeyError: - logging.error("Metadata malformed, check CID's validity") - sys.exit() - - else: - print('{} - {}'.format(cid_info['TITLE'], cid_info['CHAPTER'])) - - undrm(img_url, headers, cid_info) - -def undrm(url, headers, cid_info): - meta = requests.get(url=url, headers=headers).json() - - print('Page count: {}'.format(len(meta['data']['result']))) - - save_path = os.path.join('downloaded_chapters/{}/{}'.format(cid_info['TITLE'], cid_info['CHAPTER'])) - - if not os.path.exists(save_path): - os.makedirs(save_path) - - print(f'Saving chapter to {save_path}') - - for num, page in enumerate(meta['data']['result']): - - if args.verbose: - logging.info(f'Downloading page {num+1}') - - key = page['meta']['drm_hash'] - file = requests.get(page['meta']['source_url'], headers=headers).content - pagination = str(num + 1) + '.webp' - - if key is not None: - key = unhexlify(key[:16]) - file = xor(file, key) - - with open(f'{save_path}/{pagination}', 'wb') as f: - f.write(file) - -def xor(bin, key): - retval = [] - - for idx, val in enumerate(bin): - retval.append(val ^ key[idx % len(key)]) - - return bytes(retval) - -def get_cid_query(url): - u = urllib.parse.urlparse(url) - qs = urllib.parse.parse_qs(u.query) - return qs["cid"][0] - -def extract_cid(cid): - if cid.startswith("http"): - # have been given an url, lets extract the cid from it - if 'contents' in cid: - # this is a whole-series page - page = requests.get(cid) - urls = re.findall(r"]*href=['\"](?P[^'\"]+)['\"][^>]*'backnumber'", page.text) - # the links to the chapters always have an onclick arg that includes 'backnumber' - return [get_cid_query(i) for i in urls] - elif 'viewer' in cid: - # this is a chapter page - return [get_cid_query(cid)] - - # otherwise probably a raw cid - return [cid] - -def main(): - - headers = { - 'authority': 'comicwalker-api.nicomanga.jp', - 'accept': '*/*', - 'accept-language': 'en-US,en;q=0.9', - 'cache-control': 'no-cache', - 'origin': 'https://comic-walker.com', - 'pragma': 'no-cache', - 'referer': 'https://comic-walker.com/', - 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"', - 'sec-ch-ua-mobile': '?0', - 'sec-ch-ua-platform': '"Windows"', - 'sec-fetch-Blowfisht': 'empty', - 'sec-fetch-mode': 'cors', - 'sec-fetch-site': 'cross-site', - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36', - } - - cids = extract_cid(args.cid) - for cid in cids: - content_url = f'https://comicwalker-api.nicomanga.jp/api/v1/comicwalker/episodes/{cid}' - start(content_url, headers) - - -if __name__ == "__main__": - main() +import argparse +import json +import logging +import os +import re +import requests +import sys +import urllib.parse + +from binascii import unhexlify + +parser = argparse.ArgumentParser() +parser.add_argument('cid', help='content id, chapter URL, or series URL') +parser.add_argument('-v', '--verbose', help='log more', action="store_true") +args = parser.parse_args() + +logging.basicConfig(format='[%(levelname)s] %(asctime)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO) + +def start(url, headers): + meta = requests.get(url=url, headers=headers).json() + img_url = f'{url}/frames?enable_webp=true' + + try: + cid_info = { + "TITLE": meta['data']['extra']['content']['title'], + "CHAPTER": meta['data']['result']['title'] + } + + except KeyError: + logging.error("Metadata malformed, check CID's validity") + sys.exit() + + else: + print('{} - {}'.format(cid_info['TITLE'], cid_info['CHAPTER'])) + + undrm(img_url, headers, cid_info) + +def undrm(url, headers, cid_info): + meta = requests.get(url=url, headers=headers).json() + + print('Page count: {}'.format(len(meta['data']['result']))) + + save_path = os.path.join('downloaded_chapters/{}/{}'.format(cid_info['TITLE'], cid_info['CHAPTER'])) + + if not os.path.exists(save_path): + os.makedirs(save_path) + + print(f'Saving chapter to {save_path}') + + for num, page in enumerate(meta['data']['result']): + + if args.verbose: + logging.info(f'Downloading page {num+1}') + + key = page['meta']['drm_hash'] + file = requests.get(page['meta']['source_url'], headers=headers).content + pagination = str(num + 1) + '.webp' + + if key is not None: + key = unhexlify(key[:16]) + file = xor(file, key) + + with open(f'{save_path}/{pagination}', 'wb') as f: + f.write(file) + +def xor(bin, key): + retval = [] + + for idx, val in enumerate(bin): + retval.append(val ^ key[idx % len(key)]) + + return bytes(retval) + +def get_cid_query(url): + u = urllib.parse.urlparse(url) + qs = urllib.parse.parse_qs(u.query) + return qs["cid"][0] + +def extract_cid(cid): + if cid.startswith("http"): + # have been given an url, lets extract the cid from it + if 'contents' in cid: + # this is a whole-series page + page = requests.get(cid) + urls = re.findall(r"]*href=['\"](?P[^'\"]+)['\"][^>]*'backnumber'", page.text) + # the links to the chapters always have an onclick arg that includes 'backnumber' + return [get_cid_query(i) for i in urls] + elif 'viewer' in cid: + # this is a chapter page + return [get_cid_query(cid)] + + # otherwise probably a raw cid + return [cid] + +def main(): + + headers = { + 'authority': 'comicwalker-api.nicomanga.jp', + 'accept': '*/*', + 'accept-language': 'en-US,en;q=0.9', + 'cache-control': 'no-cache', + 'origin': 'https://comic-walker.com', + 'pragma': 'no-cache', + 'referer': 'https://comic-walker.com/', + 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-Blowfisht': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'cross-site', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36', + } + + cids = extract_cid(args.cid) + for cid in cids: + content_url = f'https://comicwalker-api.nicomanga.jp/api/v1/comicwalker/episodes/{cid}' + start(content_url, headers) + + +if __name__ == "__main__": + main() -- cgit v1.2.3-70-g09d2