目录

我用一天时间“偷了”网易云音乐50W+用户信息

没有多进程,没有任何黑科技的裸爬虫。练手用,爬虫获取到的数据皆为非敏感且公开的用户信息。

一、思路

在 GitHub 上已经有网易云音乐的 node.js API(GitHub:https://github.com/Binaryify/NeteaseCloudMusicApi)。根据这个库提供的信息,可以很轻易的获取到网易云音乐获取某个用户的粉丝信息接口的参数(接口限制只能获取 100 个),进而继续获取这 100 个粉丝的粉丝… 简单的几层循环嵌套就能很轻易的拿到十万级到百万级的用户数据(非敏感用户信息)。

二、参数加密流程分析

__getFormData(data, __get_random_str()) 
    参数1:data是dict数据,包含了表单的各个字段和数据 
    参数2:16位的随机字符串 
    最终return的是一个dict,包含了params和encSecKey两个参数 
 
    params 
        __get_encText(args1, random16str) 
        参数1:args1是__getFormData函数的参数1,是dict数据,包含了表单的各个字段和数据 
        参数2:random16str是__getFormData函数的参数2,是一个16位的随机字符串 
        最终返回的是将参数加密后产生的params 
 
        __AES_encrypt(args1, args4) 
            参数1:args1是__get_encText函数的参数1,是dict数据,包含了表单的各个字段和数据 
            参数2:arg4是一个固定参数 
            最终返回的是将参数使用AES CBC加密后再进行一次base64加密产生的字符串 
 
        使用__AES_encrypt函数首先加密一次参数是(args1,args4)得到一个加密的字符串 
        在使用加密过一次的字符串作为参数1,和__get_encText函数传入的参数2 random16str 这个随机16位的字符串作为参数2继续加密1次 
        最终得到params 
    
    encSecKey 
        __get_encSecKey(random16str) 
            参数1:random16str是__getFormData函数的参数2,是一个16位的随机字符串
            最终返回的是通过随机字符串产生的encSecKey 
 
        固定参数arg2 
        固定参数arg3 
        通过固定算法,使用随机16位的字符串random16str与这两个固定参数产生encSecKey

三、源码

common.py (需要用到的函数)

import base64
from Crypto.Cipher import AES
import random
import codecs
import requests
from fake_useragent import UserAgent
 
def __AES_encrypt(text, key):
    '''
    获取到加密后的数据
    :param text: 首先CBC加密方法,text必须位16位数据
    :param key: 加密的key
    :return: 加密后的字符串
    '''
    iv = "0102030405060708"
    pad = 16 - len(text) % 16
    if isinstance(text, str):
        text = text + pad * chr(pad)
    else:
        text = text.deocde("utf-8") + pad * chr(pad)
    aes = AES.new(key=bytes(key, encoding="utf-8"), mode=2, IV=bytes(iv, encoding="utf-8"))
    res = aes.encrypt(bytes(text, encoding="utf-8"))
    res = base64.b64encode(res).decode("utf-8")
    return res
 
def __get_encText(args1, random16str):
    args4 = "0CoJUm6Qyw8W8jud"
    encText = __AES_encrypt(args1, args4)
    encText = __AES_encrypt(encText, random16str)
    return encText
 
def __get_encSecKey(random16str):
    '''通过查看js代码,获取encSecKey'''
    arg2 = "010001"
    arg3 = "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7"
    text = random16str[::-1]
    rs = int(codecs.encode(text.encode('utf-8'), 'hex_codec'), 16) ** int(arg2, 16) % int(arg3, 16)
    return format(rs, 'x').zfill(256)
 
def __get_random_str():
    '''这是16位的随机字符串'''
    str_set = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
    random_str = ""
    for i in range(16):
        index = random.randint(0, len(str_set) - 1)
        random_str += str_set[index]
    return random_str
 
def __getFormData(args1, random16str):
    '''获取到提交的数据'''
    data = {"params": __get_encText(args1, random16str), "encSecKey": __get_encSecKey(random16str)}
    return data
 
def __getFans(userID):
    userDict = {}
    userID = str(userID) # userID="6177307"
    data = '{"userId":"' + userID + '","time":"-1","limit":"104334","csrf_token": ""}'
    formdata = __getFormData(data, __get_random_str()) 
    
    ua = UserAgent()
    session = requests.Session()
    headers = {}
    headers["content-type"] = "application/x-www-form-urlencoded"
    headers["user-agent"] = ua.random
    headers["referer"] = "https://music.163.com/"
    response = session.post(url='https://music.163.com/weapi/user/getfolloweds', headers=headers, data=formdata)
    results = response.json()
    # print(response.status_code)
    results = results.get('followeds')
    for one in results:
        userDict[one.get('userId')] = one
        # print(one.get('userId'))
        # print(str(one))
    return userDict

main.py (主程序)

# -*- coding: utf-8 -*-
import common
AllData = {}
data = common.__getFans(6177307)
AllData.update(data)
sum = 0
for item in data:
    temp = common.__getFans(item)
    AllData.update(temp)
    for item1 in temp:
        temp2 = common.__getFans(item1)
        AllData.update(temp2)
        for item2 in temp2:
            sum += 1
            print(sum)
            temp3 = common.__getFans(item2)
            AllData.update(temp3)
 
with open('fans.txt', 'a', encoding='utf-8') as f:
    for one in AllData.items():
        f.write(str(one) + '\n')
        print(str(one))

四、数据

我把 main.py 放到了服务器上运行,跑到程序结束大概用了 24 小时左右后看了一下存储的文本有 50W 左右的用户数据(不含敏感信息),如下。

(83543823, {'py': 'mjdtjst', 'time': 1510758264852, 'userId': 83543823, 'mutual': False, 'followed': False, 'accountStatus': 0, 'nickname': '名劍動天倦收天', 'avatarUrl': 'http://p1.music.126.net/uocXBF145t-_V0pLWDwv0w==/3272146604393759.jpg', 'gender': 1, 'expertTags': None, 'experts': None, 'followeds': 13, 'remarkName': None, 'follows': 19, 'authStatus': 0, 'userType': 0, 'vipType': 0, 'signature': '天下若倾,尚有儒门一手擎天!', 'vipRights': None, 'eventCount': 1, 'playlistCount': 5})
(305937375, {'py': 'ttqsunny', 'time': 1510751111003, 'userId': 305937375, 'mutual': False, 'followed': False, 'accountStatus': 0, 'nickname': '甜甜圈sunny', 'avatarUrl': 'http://p1.music.126.net/v9iyq-6I1WC96R7SlbKvXQ==/3420580709664324.jpg', 'gender': 1, 'expertTags': None, 'experts': None, 'followeds': 4, 'remarkName': None, 'follows': 30, 'authStatus': 0, 'userType': 0, 'vipType': 11, 'signature': None, 'vipRights': {'associator': {'vipCode': 100, 'rights': True}, 'musicPackage': None, 'redVipAnnualCount': -1}, 'eventCount': 0, 'playlistCount': 3})
(359743222, {'py': 'xxwmhjl-_', 'time': 1509411979309, 'userId': 359743222, 'mutual': False, 'followed': False, 'accountStatus': 0, 'nickname': '醒醒我们回家了-_', 'avatarUrl': 'http://p1.music.126.net/KcAVTPDSC8MrKaFB9_Vd9g==/109951163985306640.jpg', 'gender': 1, 'expertTags': None, 'experts': None, 'followeds': 4, 'remarkName': None, 'follows': 9, 'authStatus': 0, 'userType': 0, 'vipType': 0, 'signature': '身邪不怕影子正', 'vipRights': None, 'eventCount': 0, 'playlistCount': 7})
(285736292, {'py': 'gzydyzj-', 'time': 1509016136464, 'userId': 285736292, 'mutual': False, 'followed': False, 'accountStatus': 0, 'nickname': '孤舟夜灯一掌剑-', 'avatarUrl': 'http://p1.music.126.net/SsOPGfkTUM0dSEkOuvHfHQ==/109951163351011922.jpg', 'gender': 1, 'expertTags': None, 'experts': None, 'followeds': 13, 'remarkName': None, 'follows': 33, 'authStatus': 0, 'userType': 0, 'vipType': 0, 'signature': '寂寞候忘了向先生说告辞!', 'vipRights': None, 'eventCount': 39, 'playlistCount': 23})
(79969440, {'py': 'cj', 'time': 1508992729596, 'userId': 79969440, 'mutual': False, 'followed': False, 'accountStatus': 0, 'nickname': '尺疾', 'avatarUrl': 'http://p1.music.126.net/SLdf4abndYLV4Gq8eXbK8w==/109951163788127476.jpg', 'gender': 1, 'expertTags': None, 'experts': None, 'followeds': 9, 'remarkName': None, 'follows': 17, 'authStatus': 0, 'userType': 0, 'vipType': 11, 'signature': '绝情莫过绝音讯,断爱无非断感情', 'vipRights': {'associator': {'vipCode': 100, 'rights': True}, 'musicPackage': None, 'redVipAnnualCount': -1}, 'eventCount': 0, 'playlistCount': 6})
(413801433, {'py': 'Aresfx', 'time': 1508811460203, 'userId': 413801433, 'mutual': False, 'followed': False, 'accountStatus': 0, 'nickname': 'Ares风雪', 'avatarUrl': 'http://p1.music.126.net/B1qPhuJvDGuAkQxGvNkxoA==/18828037115798229.jpg', 'gender': 1, 'expertTags': None, 'experts': None, 'followeds': 5, 'remarkName': None, 'follows': 32, 'authStatus': 0, 'userType': 0, 'vipType': 0, 'signature': None, 'vipRights': None, 'eventCount': 6, 'playlistCount': 11})
(385103142, {'py': 'gyw', 'time': 1508734902599, 'userId': 385103142, 'mutual': False, 'followed': False, 'accountStatus': 0, 'nickname': '归于无', 'avatarUrl': 'http://p1.music.126.net/HjaQMktAYh5dDuf009Fv5A==/18588343580852886.jpg', 'gender': 1, 'expertTags': None, 'experts': None, 'followeds': 1, 'remarkName': None, 'follows': 8, 'authStatus': 0, 'userType': 0, 'vipType': 0, 'signature': None, 'vipRights': None, 'eventCount': 0, 'playlistCount': 2})
(83142991, {'py': 'persana', 'time': 1508582619577, 'userId': 83142991, 'mutual': False, 'followed': False, 'accountStatus': 0, 'nickname': 'persana', 'avatarUrl': 'http://p1.music.126.net/psdQcsE9EWpawBXFDEbBag==/109951163289688334.jpg', 'gender': 1, 'expertTags': None, 'experts': None, 'followeds': 2, 'remarkName': None, 'follows': 11, 'authStatus': 0, 'userType': 0, 'vipType': 0, 'signature': '所有悲欢离合最后都不过付与说书人', 'vipRights': None, 'eventCount': 0, 'playlistCount': 4})
(259674694, {'py': 'hcxdn', 'time': 1508483033261, 'userId': 259674694, 'mutual': False, 'followed': False, 'accountStatus': 0, 'nickname': '会嘲笑的鸟', 'avatarUrl': 'http://p1.music.126.net/Z6hvgL__sAOKLe_X30ePUg==/1424967073916267.jpg', 'gender': 1, 'expertTags': None, 'experts': None, 'followeds': 1, 'remarkName': None, 'follows': 7, 'authStatus': 0, 'userType': 0, 'vipType': 0, 'signature': '', 'vipRights': None, 'eventCount': 0, 'playlistCount': 5})
(277358463, {'py': 'yqj', 'time': 1507095713513, 'userId': 277358463, 'mutual': False, 'followed': False, 'accountStatus': 0, 'nickname': '御清觉', 'avatarUrl': 'http://p1.music.126.net/VCxi7mae0G7lu1auRDiRQg==/1406275381794388.jpg', 'gender': 0, 'expertTags': None, 'experts': None, 'followeds': 1, 'remarkName': None, 'follows': 8, 'authStatus': 0, 'userType': 0, 'vipType': 0, 'signature': None, 'vipRights': None, 'eventCount': 7, 'playlistCount': 6})
(411934208, {'py': 'lybl10', 'time': 1498717923059, 'followed': False, 'accountStatus': 0, 'nickname': 'lybl10', 'follows': 50, 'followeds': 31, 'userId': 411934208, 'vipType': 0, 'remarkName': None, 'avatarUrl': 'http://p1.music.126.net/BMFrIsI8jj8R-ths5hR4QQ==/109951163450374697.jpg', 'authStatus': 0, 'userType': 0, 'mutual': False, 'gender': 1, 'expertTags': None, 'experts': None, 'signature': '旁友,你听说过布袋戏吗?( ´・ᴗ・` )', 'vipRights': None, 'eventCount': 18, 'playlistCount': 18})
(380749891, {'py': '18579086180', 'time': 1563184649570, 'followed': False, 'remarkName': None, 'userId': 380749891, 'nickname': '18579086180', 'mutual': False, 'follows': 3, 'followeds': 1, 'vipType': 0, 'accountStatus': 0, 'avatarUrl': 'http://p2.music.126.net/Eu7gRGq_brlja7CqeHaMsA==/18646617697102174.jpg', 'authStatus': 0, 'userType': 0, 'gender': 1, 'expertTags': None, 'experts': None, 'signature': None, 'vipRights': None, 'eventCount': 0, 'playlistCount': 2})
(367993248, {'py': 'fymxrys', 'time': 1561220920584, 'followed': False, 'remarkName': None, 'userId': 367993248, 'nickname': '风摇满袖人与事', 'mutual': False, 'follows': 4, 'followeds': 4, 'vipType': 0, 'accountStatus': 0, 'avatarUrl': 'http://p1.music.126.net/mU8t1BFAz-KKdOz_9vFbJQ==/18807146394528801.jpg', 'authStatus': 0, 'userType': 0, 'gender': 2, 'expertTags': None, 'experts': None, 'signature': '好霹雳,萌天下,无cp观,重口皆吃,大锅炖肉,讨厌角色性格走形╮( ̄▽ ̄)╭', 'vipRights': None, 'eventCount': 0, 'playlistCount': 8})
(307861774, {'py': 'yj', 'time': 1561120492944, 'followed': False, 'remarkName': None, 'userId': 307861774, 'nickname': '予玦', 'mutual': False, 'follows': 17, 'followeds': 5, 'vipType': 0, 'accountStatus': 0, 'avatarUrl': 'http://p1.music.126.net/d_GZI27ulOSrLoUtH7ohcw==/1398578804087193.jpg', 'authStatus': 0, 'userType': 0, 'gender': 2, 'expertTags': None, 'experts': None, 'signature': '曾绕林中潇湘索,鸿雁去,倚风听竹山河曲;无奈百尺叹高楼,此间不胜寒。', 'vipRights': None, 'eventCount': 0, 'playlistCount': 7})
(554030934, {'py': 'rqezcp', 'time': 1556848452508, 'followed': False, 'remarkName': None, 'userId': 554030934, 'nickname': '如期而至cp', 'mutual': False, 'follows': 31, 'followeds': 4, 'vipType': 0, 'accountStatus': 0, 'avatarUrl': 'http://p1.music.126.net/ut1M-6fLqv6q97Eh8WGXPw==/109951163993855389.jpg', 'authStatus': 0, 'userType': 0, 'gender': 2, 'expertTags': None, 'experts': None, 'signature': '', 'vipRights': None, 'eventCount': 0, 'playlistCount': 4})
(249943751, {'py': 'jjjjaaaay', 'time': 1542523045166, 'followed': False, 'remarkName': None, 'userId': 249943751, 'nickname': 'jjjjaaaay', 'mutual': False, 'follows': 7, 'followeds': 9, 'vipType': 0, 'accountStatus': 0, 'avatarUrl': 'http://p1.music.126.net/otp0fgk7yTala8lxZ6E5YQ==/109951163653627471.jpg', 'authStatus': 0, 'userType': 0, 'gender': 1, 'expertTags': None, 'experts': None, 'signature': '', 'vipRights': None, 'eventCount': 5, 'playlistCount': 3})
(69678664, {'py': 'syfsb', 'time': 1512553262435, 'followed': False, 'userId': 69678664, 'nickname': '疏影浮生灬', 'mutual': False, 'vipType': 0, 'followeds': 9, 'remarkName': None, 'accountStatus': 0, 'follows': 90, 'avatarUrl': 'http://p2.music.126.net/StbUovFF2Fe4OM8BXfn7Sw==/109951164172211056.jpg', 'authStatus': 0, 'userType': 0, 'gender': 1, 'expertTags': None, 'experts': None, 'signature': '星雪凄天银河垂,狂艳夜徊铸楚辞。萧瑟悲声秋风起,杀忆寒蝉未鸣时。', 'vipRights': None, 'eventCount': 8, 'playlistCount': 2})
(393632402, {'py': 'lyh', 'time': 1528925134497, 'followed': False, 'remarkName': None, 'userId': 393632402, 'nickname': '樂悦鹤', 'mutual': False, 'follows': 2, 'followeds': 7, 'vipType': 0, 'accountStatus': 0, 'avatarUrl': 'http://p1.music.126.net/w7yJivlRLfnXALj62LLPBw==/109951163959519476.jpg', 'authStatus': 0, 'userType': 0, 'gender': 2, 'expertTags': None, 'experts': None, 'signature': '“你要爱荒野上的风声 ,胜过爱贫穷和思考,暮冬时烤雪 ,迟夏写长信,早春不过,一颗树。”', 'vipRights': None, 'eventCount': 1, 'playlistCount': 13})
(363727100, {'py': 'qqz', 'time': 1521783497886, 'followed': False, 'remarkName': None, 'userId': 363727100, 'nickname': '乾秋子', 'mutual': False, 'follows': 35, 'followeds': 3, 'vipType': 0, 'accountStatus': 0, 'avatarUrl': 'http://p1.music.126.net/tLbQkYiAvnWUISXkgsqlAQ==/109951163731450756.jpg', 'authStatus': 0, 'userType': 0, 'gender': 1, 'expertTags': None, 'experts': None, 'signature': '', 'vipRights': None, 'eventCount': 3, 'playlistCount': 5})
(281619418, {'py': 'hrwsltx', 'time': 1515770911364, 'followed': False, 'vipType': 11, 'nickname': '皇儒无上蔺天邢', 'mutual': False, 'accountStatus': 0, 'avatarUrl': 'http://p1.music.126.net/UEyFyd1lC50-TP8OFdYujw==/109951164003473228.jpg', 'authStatus': 0, 'userType': 0, 'gender': 1, 'expertTags': None, 'experts': None, 'remarkName': None, 'userId': 281619418, 'follows': 41, 'followeds': 19, 'signature': '师太,放我一马!!!!!', 'vipRights': {'associator': {'vipCode': 100, 'rights': True}, 'musicPackage': None, 'redVipAnnualCount': 1}, 'eventCount': 2, 'playlistCount': 11})
(102754093, {'py': 'sts', 'time': 1510381589001, 'followed': False, 'remarkName': None, 'userId': 102754093, 'nickname': '説太岁', 'mutual': False, 'follows': 7, 'followeds': 6, 'vipType': 0, 'accountStatus': 0, 'avatarUrl': 'http://p1.music.126.net/vZ3I_7VzVxF3iqTrXIh6eQ==/3272146607712737.jpg', 'authStatus': 0, 'userType': 0, 'gender': 1, 'expertTags': None, 'experts': None, 'signature': '愤怒是对自己无能的表现', 'vipRights': None, 'eventCount': 0, 'playlistCount': 6})
(255086049, {'py': 'rwyjths', 'time': 1546144733907, 'avatarUrl': 'http://p1.music.126.net/s0lEIISQLCiCvcjfuI5wUw==/109951163864346086.jpg', 'authStatus': 0, 'userType': 0, 'gender': 2, 'expertTags': None, 'experts': None, 'follows': 26, 'followeds': 9, 'remarkName': None, 'mutual': False, 'accountStatus': 0, 'followed': False, 'userId': 255086049, 'nickname': '人舞一剑探花殇', 'vipType': 10, 'signature': '', 'vipRights': {'associator': None, 'musicPackage': {'vipCode': 220, 'rights': True}, 'redVipAnnualCount': -1}, 'eventCount': 0, 'playlistCount': 6})
(125556054, {'py': '95-58165', 'time': 1506253400100, 'followed': False, 'remarkName': None, 'userId': 125556054, 'nickname': '95-58165', 'mutual': False, 'follows': 14, 'followeds': 6, 'vipType': 0, 'accountStatus': 0, 'avatarUrl': 'http://p1.music.126.net/odk0UIRLjxtl9BFZyU_7qw==/109951164198663765.jpg', 'authStatus': 0, 'userType': 0, 'gender': 1, 'expertTags': None, 'experts': None, 'signature': '知进退,明得失', 'vipRights': None, 'eventCount': 4, 'playlistCount': 11})
(79927936, {'py': '_nfbj_', 'time': 1499765008933, 'followed': False, 'remarkName': None, 'userId': 79927936, 'nickname': '_南风不竞_', 'mutual': False, 'follows': 16, 'followeds': 12, 'vipType': 11, 'accountStatus': 0, 'avatarUrl': 'http://p1.music.126.net/_HXCTTkMUS4uCbDv8_ECuQ==/109951163041830201.jpg', 'authStatus': 0, 'userType': 0, 'gender': 1, 'expertTags': None, 'experts': None, 'signature': '驰来北马多骄气,歌到南风尽死声。', 'vipRights': {'associator': {'vipCode': 100, 'rights': True}, 'musicPackage': None, 'redVipAnnualCount': -1}, 'eventCount': 4, 'playlistCount': 14})

下面是一个格式化的 json,信息的维度从 UID 到用户注册时间,还是比较丰富的。(非敏感用户信息)

{
    'py': 'mjdtjst',
    'time': 1510758264852,
    'userId': 83543823,
    'mutual': False,
    'followed': False,
    'accountStatus': 0,
    'nickname': '名劍動天倦收天',
    'avatarUrl': 'http://p1.music.126.net/uocXBF145t-_V0pLWDwv0w==/3272146604393759.jpg',
    'gender': 1,
    'expertTags': None,
    'experts': None,
    'followeds': 13,
    'remarkName': None,
    'follows': 19,
    'authStatus': 0,
    'userType': 0,
    'vipType': 0,
    'signature': '天下若倾,尚有儒门一手擎天!',
    'vipRights': None,
    'eventCount': 1,
    'playlistCount': 5
}

人对技术要持有敬畏之心,慎用之。