-
Notifications
You must be signed in to change notification settings - Fork 0
/
youtube_data_store.py
80 lines (60 loc) · 2.35 KB
/
youtube_data_store.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import logging
from typing import Iterator, List
from googleapiclient.discovery import build
from pymongo import MongoClient, ReplaceOne, DESCENDING
from pymongo.collection import Collection
YOUTUBE_API_KEY = os.environ["YOUTUBE_API_KEY"]
logging.getLogger("apiclient.discovery_cache").setLevel(logging.WARNING)
def search_videos(param: str, max_pages: int = 5) -> Iterator[List[dict]]:
"""
:param param:
:param max_pages:
:return:
returns a list of items by page with 'param' arguments, up to max_pages.
"""
youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)
search_request = youtube.search().list(
part="id",
q=param,
type="video",
maxResults=50,
)
i = 0
while search_request and i < max_pages:
search_response = search_request.execute()
video_ids = [item["id"]["videoId"] for item in search_response["items"]]
videos_response = youtube.videos().list(
part="snippet,statistics",
id=','.join(video_ids),
).execute()
yield videos_response["items"]
search_request = youtube.search.list_next(search_request, search_response)
i += 1
def save_to_mongodb(collection: Collection, items: List[dict]):
"""
convert and save a searching result to a MongoDB bson file.
"""
for item in items:
item["_id"] = item["id"]
for key, value in item["statistics"].items():
item["statistics"][key] = int(value)
operations = [ReplaceOne({"_id": item["_id"]}, item, upsert=True) for item in items]
result = collection.bulk_write(operations)
logging.info(f"upserted {result.upserted_count} documents")
def show_top_videos(collection: Collection, top: int):
"""
show most popular videos from db.
to store videos correctly on the database, avoid using best-videos feature from youtube.videos API.
"""
for item in collection.find().sort("statistics.viewCount", DESCENDING).limit(top):
print(item["statistics"]["viewCount"], item["snippet"]["title"])
def main():
mongo = MongoClient("localhost", 27017)
collection = mongo.youtube.videos
for item in search_videos("YOUTUBEAPI"):
save_to_mongodb(collection, item)
show_top_videos(collection, 5)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
main()