-
Notifications
You must be signed in to change notification settings - Fork 2
/
dota.py
152 lines (146 loc) · 12.8 KB
/
dota.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# -*- coding: UTF-8 -*-
import requests
from lxml import etree
import random,time
import xlrd
import xlwt
# 英雄名-中文转英文
heroes_dict=dict(zip( ['哈斯卡','伐木机','斧王','斯拉达','沙王','不朽尸王','全能骑士','裂魂人','军团指挥官','小小','潮汐猎人','噬魂鬼','末日使者','钢背兽','冥魂大帝','发条技师','炼金术士','马格纳斯','亚巴顿','昆卡','艾欧','龙骑士','凤凰','兽王','巨牙海民','上古巨神','狼人','暗夜魔王','撼地者','树精卫士','孽主','大地之灵','混沌骑士','帕吉','半人马战行者','酒仙','斯温','司夜刺客','冥界亚龙','力丸','敌法师','变体精灵','复仇之魂','幻影刺客','露娜','克林克兹','矮人直升机','齐天大圣','灰烬之灵','卓尔游侠','德鲁伊','赏金猎人','恐怖利刃','娜迦海妖','米拉娜','狙击手','幻影长矛手','天穹守望者','剧毒术士','影魔','巨魔战将','斯拉克','嗜血狂魔','幽鬼','虚空假面','美杜莎','圣堂刺客','石鳞剑士','米波','编织者','熊战士','主宰','育母蜘蛛','剃刀','暗影萨满','沉默术士','干扰者','远古冰魄','祈求者','殁境神蚀者','莉娜','术士','黑暗贤者','水晶室女','宙斯','莱恩','巫妖','杰奇洛','拉比克','工程师','陈','食人魔魔法师','谜团','拉席克','巫医','暗影恶魔','光之守卫','祸乱之源','蝙蝠骑士','痛苦女王','天怒法师','修补匠','戴泽','魅惑魔女','瘟疫法师','寒冬飞龙','维萨吉','死亡先知','风暴之灵','邪影芳灵','帕格纳','先知','风行者','神谕者','帕克'],['huskar','shredder','axe','slardar','sand_king','undying','omniknight','spirit_breaker','legion_commander','tiny','tidehunter','life_stealer','doom_bringer','bristleback','skeleton_king','rattletrap','alchemist','magnataur','abaddon','kunkka','wisp','dragon_knight','phoenix','beastmaster','tusk','elder_titan','lycan','night_stalker','earthshaker','treant','abyssal_underlord','earth_spirit','chaos_knight','pudge','centaur','brewmaster','sven','nyx_assassin','viper','riki','antimage','morphling','vengefulspirit','phantom_assassin','luna','clinkz','gyrocopter','monkey_king','ember_spirit','drow_ranger','lone_druid','bounty_hunter','terrorblade','naga_siren','mirana','sniper','phantom_lancer','arc_warden','venomancer','nevermore','troll_warlord','slark','bloodseeker','spectre','faceless_void','medusa','templar_assassin','pangolier','meepo','weaver','ursa','juggernaut','broodmother','razor','shadow_shaman','silencer','disruptor','ancient_apparition','invoker','obsidian_destroyer','lina','warlock','dark_seer','crystal_maiden','zuus','lion','lich','jakiro','rubick','techies','chen','ogre_magi','enigma','leshrac','witch_doctor','shadow_demon','keeper_of_the_light','bane','batrider','queenofpain','skywrath_mage','tinker','dazzle','enchantress','necrolyte','winter_wyvern','visage','death_prophet','storm_spirit','dark_willow','pugna','furion','windrunner','oracle','puck']))
# 英雄名-英文转中文
heroes_dict_en=dict(zip(['huskar','shredder','axe','slardar','sand_king','undying','omniknight','spirit_breaker','legion_commander','tiny','tidehunter','life_stealer','doom_bringer','bristleback','skeleton_king','rattletrap','alchemist','magnataur','abaddon','kunkka','wisp','dragon_knight','phoenix','beastmaster','tusk','elder_titan','lycan','night_stalker','earthshaker','treant','abyssal_underlord','earth_spirit','chaos_knight','pudge','centaur','brewmaster','sven','nyx_assassin','viper','riki','antimage','morphling','vengefulspirit','phantom_assassin','luna','clinkz','gyrocopter','monkey_king','ember_spirit','drow_ranger','lone_druid','bounty_hunter','terrorblade','naga_siren','mirana','sniper','phantom_lancer','arc_warden','venomancer','nevermore','troll_warlord','slark','bloodseeker','spectre','faceless_void','medusa','templar_assassin','pangolier','meepo','weaver','ursa','juggernaut','broodmother','razor','shadow_shaman','silencer','disruptor','ancient_apparition','invoker','obsidian_destroyer','lina','warlock','dark_seer','crystal_maiden','zuus','lion','lich','jakiro','rubick','techies','chen','ogre_magi','enigma','leshrac','witch_doctor','shadow_demon','keeper_of_the_light','bane','batrider','queenofpain','skywrath_mage','tinker','dazzle','enchantress','necrolyte','winter_wyvern','visage','death_prophet','storm_spirit','dark_willow','pugna','furion','windrunner','oracle','puck'], ['哈斯卡','伐木机','斧王','斯拉达','沙王','不朽尸王','全能骑士','裂魂人','军团指挥官','小小','潮汐猎人','噬魂鬼','末日使者','钢背兽','冥魂大帝','发条技师','炼金术士','马格纳斯','亚巴顿','昆卡','艾欧','龙骑士','凤凰','兽王','巨牙海民','上古巨神','狼人','暗夜魔王','撼地者','树精卫士','孽主','大地之灵','混沌骑士','帕吉','半人马战行者','酒仙','斯温','司夜刺客','冥界亚龙','力丸','敌法师','变体精灵','复仇之魂','幻影刺客','露娜','克林克兹','矮人直升机','齐天大圣','灰烬之灵','卓尔游侠','德鲁伊','赏金猎人','恐怖利刃','娜迦海妖','米拉娜','狙击手','幻影长矛手','天穹守望者','剧毒术士','影魔','巨魔战将','斯拉克','嗜血狂魔','幽鬼','虚空假面','美杜莎','圣堂刺客','石鳞剑士','米波','编织者','熊战士','主宰','育母蜘蛛','剃刀','暗影萨满','沉默术士','干扰者','远古冰魄','祈求者','殁境神蚀者','莉娜','术士','黑暗贤者','水晶室女','宙斯','莱恩','巫妖','杰奇洛','拉比克','工程师','陈','食人魔魔法师','谜团','拉席克','巫医','暗影恶魔','光之守卫','祸乱之源','蝙蝠骑士','痛苦女王','天怒法师','修补匠','戴泽','魅惑魔女','瘟疫法师','寒冬飞龙','维萨吉','死亡先知','风暴之灵','邪影芳灵','帕格纳','先知','风行者','神谕者','帕克']))
# 英雄名-英文
hero_list=['huskar','shredder','axe','slardar','sand_king','undying','omniknight','spirit_breaker','legion_commander','tiny','tidehunter','life_stealer','doom_bringer','bristleback','skeleton_king','rattletrap','alchemist','magnataur','abaddon','kunkka','wisp','dragon_knight','phoenix','beastmaster','tusk','elder_titan','lycan','night_stalker','earthshaker','treant','abyssal_underlord','earth_spirit','chaos_knight','pudge','centaur','brewmaster','sven','nyx_assassin','viper','riki','antimage','morphling','vengefulspirit','phantom_assassin','luna','clinkz','gyrocopter','monkey_king','ember_spirit','drow_ranger','lone_druid','bounty_hunter','terrorblade','naga_siren','mirana','sniper','phantom_lancer','arc_warden','venomancer','nevermore','troll_warlord','slark','bloodseeker','spectre','faceless_void','medusa','templar_assassin','pangolier','meepo','weaver','ursa','juggernaut','broodmother','razor','shadow_shaman','silencer','disruptor','ancient_apparition','invoker','obsidian_destroyer','lina','warlock','dark_seer','crystal_maiden','zuus','lion','lich','jakiro','rubick','techies','chen','ogre_magi','enigma','leshrac','witch_doctor','shadow_demon','keeper_of_the_light','bane','batrider','queenofpain','skywrath_mage','tinker','dazzle','enchantress','necrolyte','winter_wyvern','visage','death_prophet','storm_spirit','dark_willow','pugna','furion','windrunner','oracle','puck']
# 浏览器头文件
head = {}
head['User-Agent'] = 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19'
i=0
name_anti=[]
rate_anti=[]
# 打开一个n局的数据表,作用是:当本次爬取的特定数据缺失时,使用n局数据进行填充
# n局同样缺失的胜率,克制和配合指数设置为0.0%
data = xlrd.open_workbook('Dota_n.xlsx')
anti_data = data.sheets()[0]
comb_data=data.sheets()[1]
# 爬取各英雄作为对手时的胜率
for hero in hero_list:
url0='http://www.dotamax.com/hero/detail/match_up_anti/T/?server=cn&ladder=y&skill=h&time=month'
##########################################
# 本次爬取的是国内h局天梯最近一个月的数据,
# 可以通过修改【两个】for循环中的url0值来爬取其他数据
# url0中参数的含义:
# server(服务器)= cn(国内) world(国外) all(所有)
# ladder(类型)=y(天梯) n(普通)
# skill(等级)=n h vh
# time(时间范围)=month week v707(7.07版本之后的数据)
# match_up_comb(队友) match_up_anti(对手)
# T --英雄的名字,下一行程序会将进行替换
##########################################
url=url0.replace('T', hero, 1)
r=requests.get(url, headers=head).text
s=etree.HTML(r)
name_anti.append(s.xpath('/html/body/div[2]/div[3]/div[1]/div[2]/table/tbody/tr/td[1]/span/text()'))
name_anti[i].append(heroes_dict_en[hero])
rate_anti.append(s.xpath('/html/body/div[2]/div[3]/div[1]/div[2]/table/tbody/tr/td[2]/div[1]/text()'))
rate_anti[i].append('0.00%')
i=i+1
print (i,'/115',hero)
time.sleep(random.uniform(0.1,0.5))
# 将数据格式化并排序
list_n=0
while list_n < 115:
i=0
j=0
#把英雄名字变为英文
for heroname_cn in name_anti[list_n]:
name_anti[list_n][j]=heroes_dict[heroname_cn]
j=j+1
#把英雄的名字和胜率按照标准顺序排序
hero_n=0
while hero_n < 115:
if name_anti[list_n][i]==hero_list[hero_n]:
name_anti[list_n][i]=name_anti[list_n][hero_n]
name_anti[list_n][hero_n]=hero_list[hero_n]
temp=rate_anti[list_n][i]
rate_anti[list_n][i]=rate_anti[list_n][hero_n]
rate_anti[list_n][hero_n]=temp
hero_n=hero_n+1
i=hero_n
else:
i=i+1
if i==len(name_anti[list_n]):
print(list_n,hero_n,hero_list[list_n],hero_list[hero_n])
name_anti[list_n].insert(hero_n, hero_list[hero_n])
rate_anti[list_n].insert(hero_n, anti_data.cell(hero_n+1,list_n+1).value)
hero_n=hero_n+1
i=hero_n
print (list_n+1,'/115',hero_list[list_n],len(name_anti[list_n]))
list_n=list_n+1
# 爬取各英雄作为队友时的胜率
i=0
name_comb=[]
rate_comb=[]
for hero in hero_list:
url0='http://www.dotamax.com/hero/detail/match_up_comb/T/?server=cn&ladder=y&skill=h&time=month'
##########################################
# 本次爬取的是国内h局天梯最近一个月的数据,
# 可以通过修改【两个】for循环中的url0值来爬取其他数据
##########################################
url=url0.replace('T', hero, 1)
r=requests.get(url, headers=head).text
s=etree.HTML(r)
name_comb.append(s.xpath('/html/body/div[2]/div[3]/div[1]/div[2]/table/tbody/tr/td[1]/span/text()'))
name_comb[i].append(heroes_dict_en[hero])
rate_comb.append(s.xpath('/html/body/div[2]/div[3]/div[1]/div[2]/table/tbody/tr/td[2]/div[1]/text()'))
rate_comb[i].append('0.00%')
i=i+1
print (i,'/115',hero)
time.sleep(random.uniform(0.1,0.5))
# 将数据格式化并排序
list_n=0
while list_n < 115:
i=0
j=0
#把英雄名字变为英文
for heroname_cn in name_comb[list_n]:
name_comb[list_n][j]=heroes_dict[heroname_cn]
j=j+1
#把英雄的名字和胜率按照标准顺序排序
hero_n=0
while hero_n < 115:
if name_comb[list_n][i]==hero_list[hero_n]:
name_comb[list_n][i]=name_comb[list_n][hero_n]
name_comb[list_n][hero_n]=hero_list[hero_n]
temp=rate_comb[list_n][i]
rate_comb[list_n][i]=rate_comb[list_n][hero_n]
rate_comb[list_n][hero_n]=temp
hero_n=hero_n+1
i=hero_n
else:
i=i+1
if i==len(name_comb[list_n]):
print(list_n,hero_n,hero_list[list_n],hero_list[hero_n])
name_comb[list_n].insert(hero_n, hero_list[hero_n])
rate_comb[list_n].insert(hero_n, comb_data.cell(hero_n+1,list_n+1).value)
hero_n=hero_n+1
i=hero_n
print (list_n+1,'/115',hero_list[list_n],len(name_comb[list_n]))
list_n=list_n+1
# 将数据写入到表格
workbook = xlwt.Workbook(encoding = 'ascii')
worksheet1 = workbook.add_sheet('anti_h')
worksheet2 = workbook.add_sheet('comb_h')
for row in range(1,116):
worksheet1.write(row, 0, hero_list[row-1])
for col in range(1,116):
worksheet1.write(0, col, hero_list[col-1])
for row in range(1,116):
for col in range(1,116):
worksheet1.write(row, col, rate_anti[row-1][col-1])
for row in range(1,116):
worksheet2.write(row, 0, hero_list[row-1])
for col in range(1,116):
worksheet2.write(0, col, hero_list[col-1])
for col in range(1,116):
for row in range(1,116):
worksheet2.write(row, col, rate_comb[col-1][row-1])
workbook.save('Dota_BP_h.xls')