From 12d999c72962dc0423ccf42262b442cd05b71bec Mon Sep 17 00:00:00 2001 From: iamtomcheng Date: Tue, 9 Jan 2018 23:25:29 +0100 Subject: [PATCH 1/8] Update client.py Add '-t' and '--date' as the argument for Modes.Replay.Modes.User --- tools/hlt_client/hlt_client/client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/hlt_client/hlt_client/client.py b/tools/hlt_client/hlt_client/client.py index b48735b6..dfee892d 100755 --- a/tools/hlt_client/hlt_client/client.py +++ b/tools/hlt_client/hlt_client/client.py @@ -184,6 +184,9 @@ def _parse_arguments(): help='Number of replays to fetch') replay_user_parser.add_argument('-d', '--destination', dest='destination', action='store', type=str, required=True, help="In which folder to store all resulting replay files.") + replay_regex_parser.add_argument('-t', '--date', action='store', type=str, dest='date', required=True, + help="Fetch replay files matching the specified date. To fetch a day's files user" + "the YYYYMMDD format.") # .Modes.Replay.Modes.Date replay_regex_parser = replay_subparser.add_parser(REPLAY_MODE_DATE, help='Retrieve replays based on regex') replay_regex_parser.add_argument('-t', '--date', action='store', type=str, dest='date', required=True, From b595dc9db27d1d741b1a818f9f1f930f6ac1dbd9 Mon Sep 17 00:00:00 2001 From: iamtomcheng Date: Tue, 9 Jan 2018 23:26:29 +0100 Subject: [PATCH 2/8] Update client.py Correct a mistake --- tools/hlt_client/hlt_client/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/hlt_client/hlt_client/client.py b/tools/hlt_client/hlt_client/client.py index dfee892d..0f92949a 100755 --- a/tools/hlt_client/hlt_client/client.py +++ b/tools/hlt_client/hlt_client/client.py @@ -184,7 +184,7 @@ def _parse_arguments(): help='Number of replays to fetch') replay_user_parser.add_argument('-d', '--destination', dest='destination', action='store', type=str, required=True, help="In which folder to store all resulting replay files.") - replay_regex_parser.add_argument('-t', '--date', action='store', type=str, dest='date', required=True, + replay_user_parser.add_argument('-t', '--date', action='store', type=str, dest='date', required=True, help="Fetch replay files matching the specified date. To fetch a day's files user" "the YYYYMMDD format.") # .Modes.Replay.Modes.Date From 89525d4e36d3cb310f80660fc9f1ae2851ede74a Mon Sep 17 00:00:00 2001 From: iamtomcheng Date: Tue, 9 Jan 2018 23:45:59 +0100 Subject: [PATCH 3/8] Update download_game.py Add date as the argument for UserGameDownloader and skip replays that belongs to the wrong date in the method _fetch_metadata in case date is given as the input. --- tools/hlt_client/hlt_client/download_game.py | 27 +++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/tools/hlt_client/hlt_client/download_game.py b/tools/hlt_client/hlt_client/download_game.py index 6166ae60..70b02814 100644 --- a/tools/hlt_client/hlt_client/download_game.py +++ b/tools/hlt_client/hlt_client/download_game.py @@ -128,30 +128,41 @@ class UserGameDownloader(GameDownloader): _FETCH_THRESHOLD = 250 _BUCKETS = [] - def __init__(self, destination, user_id, limit): + def __init__(self, destination, user_id, limit, date): """ Download games for a user :param destination: Where to download :param user_id: Which user's replays to fetch :param limit: How many replays to fetch (max) + :param date: Which date to download """ self.destination = destination - self.objects = self._parse_user_metadata(self._fetch_metadata(user_id, limit)) + self.objects = self._parse_user_metadata(self._fetch_metadata(user_id, limit, date)) - def _fetch_metadata(self, user_id, limit): + def _fetch_metadata(self, user_id, limit, date): """ Retrieves paginated game metadata from the halite servers for a specified user up to limit items :param user_id: The id of the user to fetch :param limit: The maximum number of items to fetch + :param date: Which date to download :return: The full metadata of items """ print('Fetching Metadata') current = 0 result_set = [] while current <= limit: - current_limit = self._FETCH_THRESHOLD if ((limit - current) >= self._FETCH_THRESHOLD) else (limit - current) - result_set += requests.get(self._USER_BOT_URI.format(user_id, current_limit, current)).json() - current += self._FETCH_THRESHOLD + if date is None: + current_limit = self._FETCH_THRESHOLD if ((limit - current) >= self._FETCH_THRESHOLD) else (limit - current) + result_set += requests.get(self._USER_BOT_URI.format(user_id, current_limit, current)).json() + current += self._FETCH_THRESHOLD + else: + if requests.get(self._USER_BOT_URI.format(user_id, 1, current)).json()[0]["replay"].split("-")[1] != date: + current += 1 + limit += 1 + continue + else: + result_set += requests.get(self._USER_BOT_URI.format(user_id, 1, current)).json() + current += 1 print('Finished metadata fetch. Found {} game files.'.format(len(result_set))) return result_set @@ -197,5 +208,7 @@ def download(mode, destination, date, all_bots, default_user_id, user_id, limit) elif mode == client.REPLAY_MODE_USER: if not (default_user_id or user_id): raise ValueError("Cannot run default mode without authenticating .Please run `client.py --auth` first.") - UserGameDownloader(destination, default_user_id if not user_id else user_id, limit).get_objects() + if date != None and not _valid_date(date): + raise ValueError("Date must match format YYYYMMDD") + UserGameDownloader(destination, default_user_id if not user_id else user_id, limit, date).get_objects() print('Finished writing files to desired location') From 5d30cce10f96ed314c8faff7025c66fd8ce51526 Mon Sep 17 00:00:00 2001 From: iamtomcheng Date: Wed, 10 Jan 2018 14:26:48 +0100 Subject: [PATCH 4/8] Update client.py --- tools/hlt_client/hlt_client/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/hlt_client/hlt_client/client.py b/tools/hlt_client/hlt_client/client.py index 0f92949a..74200ee9 100755 --- a/tools/hlt_client/hlt_client/client.py +++ b/tools/hlt_client/hlt_client/client.py @@ -184,7 +184,7 @@ def _parse_arguments(): help='Number of replays to fetch') replay_user_parser.add_argument('-d', '--destination', dest='destination', action='store', type=str, required=True, help="In which folder to store all resulting replay files.") - replay_user_parser.add_argument('-t', '--date', action='store', type=str, dest='date', required=True, + replay_user_parser.add_argument('-t', '--date', action='store', type=str, dest='date', default=None, help="Fetch replay files matching the specified date. To fetch a day's files user" "the YYYYMMDD format.") # .Modes.Replay.Modes.Date From 2c478ab3cb32448f68178ac4d91e55f97a0d63b0 Mon Sep 17 00:00:00 2001 From: iamtomcheng Date: Thu, 11 Jan 2018 00:15:01 +0100 Subject: [PATCH 5/8] Update download_game.py Filter replays from the server side when the date is given in the method _fetch_metadata of the class UserGameDownloader. --- tools/hlt_client/hlt_client/download_game.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/tools/hlt_client/hlt_client/download_game.py b/tools/hlt_client/hlt_client/download_game.py index 70b02814..dc0bf2e3 100644 --- a/tools/hlt_client/hlt_client/download_game.py +++ b/tools/hlt_client/hlt_client/download_game.py @@ -1,6 +1,7 @@ import os import zstd import re +import datetime import requests import multiprocessing @@ -125,6 +126,7 @@ def __init__(self, destination, date, all_bots=False): class UserGameDownloader(GameDownloader): _USER_BOT_URI = 'https://api.halite.io/v1/api/user/{}/match?limit={}&offset={}' + _DATE_BOT_URI = 'https://api.halite.io/v1/api/user/{}/match?limit={}&filter=time_played,>=,{}&filter=time_played,<,{}' _FETCH_THRESHOLD = 250 _BUCKETS = [] @@ -150,19 +152,15 @@ def _fetch_metadata(self, user_id, limit, date): print('Fetching Metadata') current = 0 result_set = [] - while current <= limit: - if date is None: + if date is None: + while current <= limit: current_limit = self._FETCH_THRESHOLD if ((limit - current) >= self._FETCH_THRESHOLD) else (limit - current) result_set += requests.get(self._USER_BOT_URI.format(user_id, current_limit, current)).json() current += self._FETCH_THRESHOLD - else: - if requests.get(self._USER_BOT_URI.format(user_id, 1, current)).json()[0]["replay"].split("-")[1] != date: - current += 1 - limit += 1 - continue - else: - result_set += requests.get(self._USER_BOT_URI.format(user_id, 1, current)).json() - current += 1 + else: + current_date = datetime.datetime.strptime(date,'%Y%m%d').strftime('%Y-%m-%dT00:00') + next_date = (datetime.datetime.strptime(date,'%Y%m%d')+datetime.timedelta(days=1)).strftime('%Y-%m-%dT00:00') + result_set += requests.get(self._DATE_BOT_URI.format(user_id, limit, current_date, next_date)).json() print('Finished metadata fetch. Found {} game files.'.format(len(result_set))) return result_set From 72042a0bfbec727bdd72af6cfb54d4bc072c1c67 Mon Sep 17 00:00:00 2001 From: iamtomcheng Date: Fri, 12 Jan 2018 15:29:42 +0100 Subject: [PATCH 6/8] Update download_game.py Retrieve replays starting on the requested date on server side and filter out the retrieved replays that were played on the next day and onwards in method _fetch_metadata of class UserGameDownloader. --- tools/hlt_client/hlt_client/download_game.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/hlt_client/hlt_client/download_game.py b/tools/hlt_client/hlt_client/download_game.py index dc0bf2e3..7af46721 100644 --- a/tools/hlt_client/hlt_client/download_game.py +++ b/tools/hlt_client/hlt_client/download_game.py @@ -126,7 +126,7 @@ def __init__(self, destination, date, all_bots=False): class UserGameDownloader(GameDownloader): _USER_BOT_URI = 'https://api.halite.io/v1/api/user/{}/match?limit={}&offset={}' - _DATE_BOT_URI = 'https://api.halite.io/v1/api/user/{}/match?limit={}&filter=time_played,>=,{}&filter=time_played,<,{}' + _DATE_BOT_URI = 'https://api.halite.io/v1/api/user/{}/match?limit={}&offset={}&filter=time_played,>=,{}' _FETCH_THRESHOLD = 250 _BUCKETS = [] @@ -152,15 +152,15 @@ def _fetch_metadata(self, user_id, limit, date): print('Fetching Metadata') current = 0 result_set = [] - if date is None: - while current <= limit: - current_limit = self._FETCH_THRESHOLD if ((limit - current) >= self._FETCH_THRESHOLD) else (limit - current) + while current <= limit: + current_limit = self._FETCH_THRESHOLD if ((limit - current) >= self._FETCH_THRESHOLD) else (limit - current) + if date is None: result_set += requests.get(self._USER_BOT_URI.format(user_id, current_limit, current)).json() - current += self._FETCH_THRESHOLD - else: - current_date = datetime.datetime.strptime(date,'%Y%m%d').strftime('%Y-%m-%dT00:00') - next_date = (datetime.datetime.strptime(date,'%Y%m%d')+datetime.timedelta(days=1)).strftime('%Y-%m-%dT00:00') - result_set += requests.get(self._DATE_BOT_URI.format(user_id, limit, current_date, next_date)).json() + else: + requested_date = datetime.datetime.strptime(date,'%Y%m%d').strftime('%Y-%m-%dT00:00') + result_set += [ replay for replay in requests.get(self._DATE_BOT_URI.format(user_id, current_limit, current, current_date)).json() \ + if datetime.datetime.strptime(replay["time_played"],'%a, %d %b %Y %H:%M:%S GMT').strftime('%Y-%m-%dT00:00') == requested_date ] + current += self._FETCH_THRESHOLD print('Finished metadata fetch. Found {} game files.'.format(len(result_set))) return result_set From 95490ad14f988f7b2efb1f70841553b9f8ad9b43 Mon Sep 17 00:00:00 2001 From: iamtomcheng Date: Fri, 12 Jan 2018 15:40:20 +0100 Subject: [PATCH 7/8] Update download_game.py Correct a typo --- tools/hlt_client/hlt_client/download_game.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/hlt_client/hlt_client/download_game.py b/tools/hlt_client/hlt_client/download_game.py index 7af46721..75cb03f4 100644 --- a/tools/hlt_client/hlt_client/download_game.py +++ b/tools/hlt_client/hlt_client/download_game.py @@ -158,7 +158,7 @@ def _fetch_metadata(self, user_id, limit, date): result_set += requests.get(self._USER_BOT_URI.format(user_id, current_limit, current)).json() else: requested_date = datetime.datetime.strptime(date,'%Y%m%d').strftime('%Y-%m-%dT00:00') - result_set += [ replay for replay in requests.get(self._DATE_BOT_URI.format(user_id, current_limit, current, current_date)).json() \ + result_set += [ replay for replay in requests.get(self._DATE_BOT_URI.format(user_id, current_limit, current, requested_date)).json() \ if datetime.datetime.strptime(replay["time_played"],'%a, %d %b %Y %H:%M:%S GMT').strftime('%Y-%m-%dT00:00') == requested_date ] current += self._FETCH_THRESHOLD print('Finished metadata fetch. Found {} game files.'.format(len(result_set))) From ff2f0dc183285556dd6a4890d00d6d4a0da73c4a Mon Sep 17 00:00:00 2001 From: iamtomcheng Date: Fri, 12 Jan 2018 16:36:17 +0100 Subject: [PATCH 8/8] Update download_game.py Dropped T00:00 in format of requested date as we are filtering the whole day. --- tools/hlt_client/hlt_client/download_game.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/hlt_client/hlt_client/download_game.py b/tools/hlt_client/hlt_client/download_game.py index 75cb03f4..1d2884ba 100644 --- a/tools/hlt_client/hlt_client/download_game.py +++ b/tools/hlt_client/hlt_client/download_game.py @@ -157,9 +157,9 @@ def _fetch_metadata(self, user_id, limit, date): if date is None: result_set += requests.get(self._USER_BOT_URI.format(user_id, current_limit, current)).json() else: - requested_date = datetime.datetime.strptime(date,'%Y%m%d').strftime('%Y-%m-%dT00:00') + requested_date = datetime.datetime.strptime(date,'%Y%m%d').strftime('%Y-%m-%d') result_set += [ replay for replay in requests.get(self._DATE_BOT_URI.format(user_id, current_limit, current, requested_date)).json() \ - if datetime.datetime.strptime(replay["time_played"],'%a, %d %b %Y %H:%M:%S GMT').strftime('%Y-%m-%dT00:00') == requested_date ] + if datetime.datetime.strptime(replay["time_played"],'%a, %d %b %Y %H:%M:%S GMT').strftime('%Y-%m-%d') == requested_date ] current += self._FETCH_THRESHOLD print('Finished metadata fetch. Found {} game files.'.format(len(result_set))) return result_set