aboutsummaryrefslogtreecommitdiffstats
path: root/walker.py
diff options
context:
space:
mode:
authorgarret <garret@airmail.cc>2023-07-12 18:25:45 +0100
committergarret <garret@airmail.cc>2023-07-13 08:07:11 +0100
commit3fe103b6b722269e1094336a8578edd320880177 (patch)
tree063c225bca99eb6ccfe5b90a809bb6a091ad2fdd /walker.py
parent1174cd7fcd5693eea26c42f299cb2d6168a7c08a (diff)
downloadcomicwalker-dl-3fe103b6b722269e1094336a8578edd320880177.tar.gz
comicwalker-dl-3fe103b6b722269e1094336a8578edd320880177.tar.bz2
comicwalker-dl-3fe103b6b722269e1094336a8578edd320880177.zip
switch to unix line endings
really should have done this at the start but oh well
Diffstat (limited to 'walker.py')
-rw-r--r--walker.py242
1 files changed, 121 insertions, 121 deletions
diff --git a/walker.py b/walker.py
index 2573725..ecd8209 100644
--- a/walker.py
+++ b/walker.py
@@ -1,121 +1,121 @@
-import argparse
-import json
-import logging
-import os
-import re
-import requests
-import sys
-import urllib.parse
-
-from binascii import unhexlify
-
-parser = argparse.ArgumentParser()
-parser.add_argument('cid', help='content id, chapter URL, or series URL')
-parser.add_argument('-v', '--verbose', help='log more', action="store_true")
-args = parser.parse_args()
-
-logging.basicConfig(format='[%(levelname)s] %(asctime)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO)
-
-def start(url, headers):
- meta = requests.get(url=url, headers=headers).json()
- img_url = f'{url}/frames?enable_webp=true'
-
- try:
- cid_info = {
- "TITLE": meta['data']['extra']['content']['title'],
- "CHAPTER": meta['data']['result']['title']
- }
-
- except KeyError:
- logging.error("Metadata malformed, check CID's validity")
- sys.exit()
-
- else:
- print('{} - {}'.format(cid_info['TITLE'], cid_info['CHAPTER']))
-
- undrm(img_url, headers, cid_info)
-
-def undrm(url, headers, cid_info):
- meta = requests.get(url=url, headers=headers).json()
-
- print('Page count: {}'.format(len(meta['data']['result'])))
-
- save_path = os.path.join('downloaded_chapters/{}/{}'.format(cid_info['TITLE'], cid_info['CHAPTER']))
-
- if not os.path.exists(save_path):
- os.makedirs(save_path)
-
- print(f'Saving chapter to {save_path}')
-
- for num, page in enumerate(meta['data']['result']):
-
- if args.verbose:
- logging.info(f'Downloading page {num+1}')
-
- key = page['meta']['drm_hash']
- file = requests.get(page['meta']['source_url'], headers=headers).content
- pagination = str(num + 1) + '.webp'
-
- if key is not None:
- key = unhexlify(key[:16])
- file = xor(file, key)
-
- with open(f'{save_path}/{pagination}', 'wb') as f:
- f.write(file)
-
-def xor(bin, key):
- retval = []
-
- for idx, val in enumerate(bin):
- retval.append(val ^ key[idx % len(key)])
-
- return bytes(retval)
-
-def get_cid_query(url):
- u = urllib.parse.urlparse(url)
- qs = urllib.parse.parse_qs(u.query)
- return qs["cid"][0]
-
-def extract_cid(cid):
- if cid.startswith("http"):
- # have been given an url, lets extract the cid from it
- if 'contents' in cid:
- # this is a whole-series page
- page = requests.get(cid)
- urls = re.findall(r"<a [^>]*href=['\"](?P<url>[^'\"]+)['\"][^>]*'backnumber'", page.text)
- # the links to the chapters always have an onclick arg that includes 'backnumber'
- return [get_cid_query(i) for i in urls]
- elif 'viewer' in cid:
- # this is a chapter page
- return [get_cid_query(cid)]
-
- # otherwise probably a raw cid
- return [cid]
-
-def main():
-
- headers = {
- 'authority': 'comicwalker-api.nicomanga.jp',
- 'accept': '*/*',
- 'accept-language': 'en-US,en;q=0.9',
- 'cache-control': 'no-cache',
- 'origin': 'https://comic-walker.com',
- 'pragma': 'no-cache',
- 'referer': 'https://comic-walker.com/',
- 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
- 'sec-ch-ua-mobile': '?0',
- 'sec-ch-ua-platform': '"Windows"',
- 'sec-fetch-Blowfisht': 'empty',
- 'sec-fetch-mode': 'cors',
- 'sec-fetch-site': 'cross-site',
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36',
- }
-
- cids = extract_cid(args.cid)
- for cid in cids:
- content_url = f'https://comicwalker-api.nicomanga.jp/api/v1/comicwalker/episodes/{cid}'
- start(content_url, headers)
-
-
-if __name__ == "__main__":
- main()
+import argparse
+import json
+import logging
+import os
+import re
+import requests
+import sys
+import urllib.parse
+
+from binascii import unhexlify
+
+parser = argparse.ArgumentParser()
+parser.add_argument('cid', help='content id, chapter URL, or series URL')
+parser.add_argument('-v', '--verbose', help='log more', action="store_true")
+args = parser.parse_args()
+
+logging.basicConfig(format='[%(levelname)s] %(asctime)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO)
+
+def start(url, headers):
+ meta = requests.get(url=url, headers=headers).json()
+ img_url = f'{url}/frames?enable_webp=true'
+
+ try:
+ cid_info = {
+ "TITLE": meta['data']['extra']['content']['title'],
+ "CHAPTER": meta['data']['result']['title']
+ }
+
+ except KeyError:
+ logging.error("Metadata malformed, check CID's validity")
+ sys.exit()
+
+ else:
+ print('{} - {}'.format(cid_info['TITLE'], cid_info['CHAPTER']))
+
+ undrm(img_url, headers, cid_info)
+
+def undrm(url, headers, cid_info):
+ meta = requests.get(url=url, headers=headers).json()
+
+ print('Page count: {}'.format(len(meta['data']['result'])))
+
+ save_path = os.path.join('downloaded_chapters/{}/{}'.format(cid_info['TITLE'], cid_info['CHAPTER']))
+
+ if not os.path.exists(save_path):
+ os.makedirs(save_path)
+
+ print(f'Saving chapter to {save_path}')
+
+ for num, page in enumerate(meta['data']['result']):
+
+ if args.verbose:
+ logging.info(f'Downloading page {num+1}')
+
+ key = page['meta']['drm_hash']
+ file = requests.get(page['meta']['source_url'], headers=headers).content
+ pagination = str(num + 1) + '.webp'
+
+ if key is not None:
+ key = unhexlify(key[:16])
+ file = xor(file, key)
+
+ with open(f'{save_path}/{pagination}', 'wb') as f:
+ f.write(file)
+
+def xor(bin, key):
+ retval = []
+
+ for idx, val in enumerate(bin):
+ retval.append(val ^ key[idx % len(key)])
+
+ return bytes(retval)
+
+def get_cid_query(url):
+ u = urllib.parse.urlparse(url)
+ qs = urllib.parse.parse_qs(u.query)
+ return qs["cid"][0]
+
+def extract_cid(cid):
+ if cid.startswith("http"):
+ # have been given an url, lets extract the cid from it
+ if 'contents' in cid:
+ # this is a whole-series page
+ page = requests.get(cid)
+ urls = re.findall(r"<a [^>]*href=['\"](?P<url>[^'\"]+)['\"][^>]*'backnumber'", page.text)
+ # the links to the chapters always have an onclick arg that includes 'backnumber'
+ return [get_cid_query(i) for i in urls]
+ elif 'viewer' in cid:
+ # this is a chapter page
+ return [get_cid_query(cid)]
+
+ # otherwise probably a raw cid
+ return [cid]
+
+def main():
+
+ headers = {
+ 'authority': 'comicwalker-api.nicomanga.jp',
+ 'accept': '*/*',
+ 'accept-language': 'en-US,en;q=0.9',
+ 'cache-control': 'no-cache',
+ 'origin': 'https://comic-walker.com',
+ 'pragma': 'no-cache',
+ 'referer': 'https://comic-walker.com/',
+ 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
+ 'sec-ch-ua-mobile': '?0',
+ 'sec-ch-ua-platform': '"Windows"',
+ 'sec-fetch-Blowfisht': 'empty',
+ 'sec-fetch-mode': 'cors',
+ 'sec-fetch-site': 'cross-site',
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36',
+ }
+
+ cids = extract_cid(args.cid)
+ for cid in cids:
+ content_url = f'https://comicwalker-api.nicomanga.jp/api/v1/comicwalker/episodes/{cid}'
+ start(content_url, headers)
+
+
+if __name__ == "__main__":
+ main()