walker.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121

import argparse
import json
import logging
import os
import re
import requests
import sys
import urllib.parse

from binascii import unhexlify

parser = argparse.ArgumentParser()
parser.add_argument('cid', help='content id, chapter URL, or series URL')
parser.add_argument('-v', '--verbose', help='log more', action="store_true")
args = parser.parse_args()

logging.basicConfig(format='[%(levelname)s] %(asctime)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO)

def start(url, headers):
    meta = requests.get(url=url, headers=headers).json()
    img_url = f'{url}/frames?enable_webp=true'

    try:
        cid_info = {
            "TITLE": meta['data']['extra']['content']['title'],
            "CHAPTER": meta['data']['result']['title']
        }

    except KeyError:
        logging.error("Metadata malformed, check CID's validity")
        sys.exit()

    else:
        print('{} - {}'.format(cid_info['TITLE'], cid_info['CHAPTER']))

        undrm(img_url, headers, cid_info)

def undrm(url, headers, cid_info):
    meta = requests.get(url=url, headers=headers).json()

    print('Page count: {}'.format(len(meta['data']['result'])))

    save_path = os.path.join('downloaded_chapters/{}/{}'.format(cid_info['TITLE'], cid_info['CHAPTER']))

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    print(f'Saving chapter to {save_path}')

    for num, page in enumerate(meta['data']['result']):

        if args.verbose:
            logging.info(f'Downloading page {num+1}')

        key = page['meta']['drm_hash']
        file = requests.get(page['meta']['source_url'], headers=headers).content
        pagination = str(num + 1) + '.webp'

        if key is not None:
            key = unhexlify(key[:16])
            file = xor(file, key)

        with open(f'{save_path}/{pagination}', 'wb') as f:
            f.write(file)

def xor(bin, key):
    retval = []

    for idx, val in enumerate(bin):
        retval.append(val ^ key[idx % len(key)])

    return bytes(retval)

def get_cid_query(url):
    u = urllib.parse.urlparse(url)
    qs = urllib.parse.parse_qs(u.query)
    return qs["cid"][0]

def extract_cid(cid):
    if cid.startswith("http"):
        # have been given an url, lets extract the cid from it
        if 'contents' in cid:
            # this is a whole-series page
            page = requests.get(cid)
            urls = re.findall(r"<a [^>]*href=['\"](?P<url>[^'\"]+)['\"][^>]*'backnumber'", page.text)
                             # the links to the chapters always have an onclick arg that includes 'backnumber'
            return [get_cid_query(i) for i in urls]
        elif 'viewer' in cid:
            # this is a chapter page
            return [get_cid_query(cid)]

    # otherwise probably a raw cid
    return [cid]

def main():

    headers = {
        'authority': 'comicwalker-api.nicomanga.jp',
        'accept': '*/*',
        'accept-language': 'en-US,en;q=0.9',
        'cache-control': 'no-cache',
        'origin': 'https://comic-walker.com',
        'pragma': 'no-cache',
        'referer': 'https://comic-walker.com/',
        'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'sec-fetch-Blowfisht': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'cross-site',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36',
    }

    cids = extract_cid(args.cid)
    for cid in cids:
        content_url = f'https://comicwalker-api.nicomanga.jp/api/v1/comicwalker/episodes/{cid}'
        start(content_url, headers)


if __name__ == "__main__":
    main()