Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Applying some changes and modifications #17

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,25 @@
# movie-py-cli
A simple script for fetching movie details right from the terminal

A simple script for fetching movie details right from the terminal.

# Requirements

1. Requests
2. BeautifulSoup4
3. Colorama
# Usage
![Screenshot](https://user-images.githubusercontent.com/29385192/36164961-073b7642-1114-11e8-93b3-6718b54bc00b.PNG)

# Packages Installation

Simply install the required packages with pip as below:

$ pip install -r requirements.txt

# How to Use:

For using it, simply change directory to the main folder (movie-py-cli), run the script as below by passing the title of your desired movie to retreive its information.
(Note that this script is written with Python 3.~.)

$ python3 movie-py.py peaceful warrior

# Screenshot

![Screenshot from 2021-04-11 19-21-03](https://user-images.githubusercontent.com/16654946/114309842-e38cf000-9afd-11eb-9461-46f8ff1e40f3.png)
187 changes: 122 additions & 65 deletions movie-py.py
Original file line number Diff line number Diff line change
@@ -1,71 +1,128 @@
from utils import bcolors, Cleaner
import sys
import time
import requests
from bs4 import BeautifulSoup
import time
from colorama import init,Fore
init(convert=True)
from itertools import cycle
import threading


def animated_progress():
global stop
print()
steps = ["⢿", "⣻", "⣽", "⣾", "⣷", "⣯", "⣟", "⡿"]
for step in cycle(steps):
print(bcolors.BLUE + bcolors.ITALIC + f'\rScrapping... {step}' + bcolors.ENDC, end='', flush=True)
if stop:
print(bcolors.BLUE + bcolors.ITALIC + f"\rScrapping Done!" + bcolors.ENDC, flush=True)
print()
break

def main():
# Call the `animated_progress` func in a different thread
global stop
stop = False
t = threading.Thread(target=animated_progress)
t.start()
# Variable definition for 'txt-block' to prevent NameError exception
movie_budget = 'N/A'
movie_opening = 'N/A'
movie_usa_gross = 'N/A'
movie_world_gross = 'N/A'
movie_ratio = 'N/A'
movie_tag_lines = 'N/A'
movie_also_known = 'N/A'
movie_country = 'N/A'
try:
# args = 'peaceful warrior'
# User's desired movie
args = sys.argv[1:]
if not args:
stop = True
time.sleep(1)
print(bcolors.WARNING + bcolors.BOLD + 'NO FILM TITLE IS PROVIDED!' + bcolors.ENDC)
sys.exit(1)
else:
movie = (' ').join(args)
# Scrape the page and do the assignments
page = requests.get('https://www.imdb.com/find?ref_=nv_sr_fn&q=' + movie + '&s=tt')
soup1 = BeautifulSoup(page.content, 'html.parser')
movie_id = soup1.select(".findList tr a")[0].get('href')
movie_link = "http://www.imdb.com" + movie_id
mlink_page = requests.get(movie_link)
soup2 = BeautifulSoup(mlink_page.content, 'html.parser')
title_nyear = soup2.select(".title_wrapper h1")[0].text
movie_title = title_nyear[0:len(title_nyear) - 8]
movie_year = title_nyear[len(title_nyear) - 6:len(title_nyear) - 2]
movie_rating = soup2.select(".ratingValue span")[0].text if soup2.select(".ratingValue span") else None
meta_score = soup2.select(".metacriticScore")
meta_score = meta_score[0].text.strip() if meta_score else None
content_rating = soup2.find('meta',{'itemprop':'contentRating'})
content_rating = content_rating['content'].strip() if content_rating else None
movie_length = soup2.select(".subtext time")[0].text.strip() if soup2.select(".subtext time") else 'N/A'
genres_ndate = [i.text for i in soup2.select(".subtext a")]
release_date = genres_ndate[-1].strip()
for i in soup2.find_all("div","txt-block"):
if i.h4:
if i.h4.text == "Budget:":movie_budget = i.h4.next_element.next_element.strip()
if i.h4.text == "Opening Weekend USA:":movie_opening = i.h4.next_element.next_element.strip()[:-1]
if i.h4.text == "Gross USA:":movie_usa_gross = i.h4.next_element.next_element.strip()[:-1]
if i.h4.text == "Cumulative Worldwide Gross:":movie_world_gross = i.h4.next_element.next_element.strip()[:-1]
if i.h4.text == "Aspect Ratio:":movie_ratio = i.h4.next_element.next_element.strip()
if i.h4.text == "Taglines:":movie_tag_lines = i.h4.next_element.next_element.strip()
if i.h4.text == "Also Known As:":movie_also_known = i.h4.next_element.next_element.strip()
if i.h4.text == "Country:":movie_country = i.h4.next_sibling.next_element.text.strip()
movie_genres = ""
for x in range(len(genres_ndate) - 1):
movie_genres = movie_genres + ',' + genres_ndate[x]
movie_genres = movie_genres[1:]
movie_desc = soup2.select(".summary_text")[0].text.strip()
# Movie cast extraction
movie_cast = [i.text.strip().replace('\n', ' ') for i in soup2.select(".credit_summary_item")]
# Make a dictionary of movie cast
dict_cast = {}
for item in movie_cast:
# Create a dictionary of the casts
dict_cast[item[:item.find(':')]] = item[item.find(':')+1:]
# Get rid of useless characters
cleaned_movie_cast = Cleaner()
movie_cast = cleaned_movie_cast.text_cleaner(dict_cast=dict_cast)
movie_director = movie_cast['Director'] if movie_cast.get('Director') else (movie_cast['Creator'] if movie_cast.get('Creator') else 'N/A')
movie_actors = movie_cast['Stars'] if movie_cast.get('Stars') else 'N/A'

print(Fore.LIGHTBLACK_EX+"Loading...")
time.sleep(0.5)
print(Fore.LIGHTBLACK_EX+"Please wait...\n")
# After scraping, stop the `animated_progress` func and print the results
stop = True
time.sleep(1)

try:
args = sys.argv[1:]
movie = (' ').join(args)
page = requests.get('http://www.imdb.com/find?ref_=nv_sr_fn&q=' + movie + '&s=tt');
soup1 = BeautifulSoup(page.content, 'html.parser')
movieid = soup1.select(".findList tr a")[0].get('href')
movielink = "http://www.imdb.com" + movieid
mlinkpage = requests.get(movielink)
soup2 = BeautifulSoup(mlinkpage.content, 'html.parser')
titlenyear = soup2.select(".title_wrapper h1")[0].text
movietitle = titlenyear[0:len(titlenyear) - 8]
movieyear = titlenyear[len(titlenyear) - 6:len(titlenyear) - 2]
movierating = soup2.select(".ratingValue span")[0].text
metascore = soup2.select(".metacriticScore")
metascore = metascore[0].text.strip() if metascore else None
contentrating = soup2.find('meta',{'itemprop':'contentRating'})
contentrating = contentrating['content'].strip() if contentrating else None
movielength = soup2.select(".subtext time")[0].text.strip()
genresndate = [i.text for i in soup2.select(".subtext a")]
releasedate = genresndate[-1].strip()
moviegenres = ""
for i in soup2.find_all("div","txt-block"):
if i.h4:
if i.h4.text=="Budget:":moviebudget = i.h4.next_element.next_element.strip()
if i.h4.text=="Opening Weekend USA:":movieopening = i.h4.next_element.next_element.strip()[:-1]
if i.h4.text=="Gross USA:":movieusagross = i.h4.next_element.next_element.strip()[:-1]
if i.h4.text=="Cumulative Worldwide Gross:":movieworldgross = i.h4.next_element.next_element.strip()[:-1]
if i.h4.text=="Aspect Ratio:":movieratio = i.h4.next_element.next_element.strip()
if i.h4.text=="Taglines:":movietaglines = i.h4.next_element.next_element.strip()
if i.h4.text=="Also Known As:":moviealsoknown = i.h4.next_element.next_element.strip()
if i.h4.text=="Country:":moviecountry = i.h4.next_sibling.next_element.text.strip()
for x in range(len(genresndate) - 1):
moviegenres = moviegenres + ',' + genresndate[x]
moviegenres = moviegenres[1:]
moviedesc = soup2.select(".summary_text")[0].text.strip()
moviecast = [i.text for i in soup2.select(".credit_summary_item span a span")]
moviedirector = moviecast[0]
movieactors = moviecast[3] + ',' + moviecast[4] + ',' + moviecast[5];
# Print the results
print(bcolors.RED + "Title: " + bcolors.BOLD + bcolors.GREEN + movie_title + bcolors.ENDC)
if movie_rating:
print(bcolors.RED + "IMDB Rating: " + bcolors.BOLD + bcolors.GREEN + movie_rating + "/10" + bcolors.ENDC)
if meta_score:
print(bcolors.RED + "Metascore: " + bcolors.BOLD + bcolors.GREEN + meta_score + "/100" + bcolors.ENDC)
print(bcolors.RED + "Length: " + bcolors.BOLD + bcolors.GREEN + movie_length + bcolors.ENDC)
print(bcolors.RED + "Year: " + bcolors.BOLD + bcolors.GREEN + movie_year + bcolors.ENDC)
print(bcolors.RED + "Genre: " + bcolors.BOLD + bcolors.GREEN + movie_genres + bcolors.ENDC)
print(bcolors.RED + "Description: " + bcolors.ENDC + movie_desc)
print(bcolors.RED + "Release date: " + bcolors.BOLD + bcolors.GREEN + release_date + bcolors.ENDC)
if content_rating:
print(bcolors.RED + "Rating: " + bcolors.BOLD + bcolors.GREEN + content_rating + bcolors.ENDC)
print(bcolors.RED + "Director: " + bcolors.BOLD + bcolors.GREEN + movie_director + bcolors.ENDC)
print(bcolors.RED + "Lead Cast: " + bcolors.BOLD + bcolors.GREEN + movie_actors + bcolors.ENDC)
print(bcolors.RED + "Country: " + bcolors.BOLD + bcolors.GREEN + movie_country + bcolors.ENDC)
print(bcolors.RED + "Also Known As: " + bcolors.BOLD + bcolors.GREEN + movie_also_known + bcolors.ENDC)
print(bcolors.RED + "Budget: " + bcolors.BOLD + bcolors.GREEN + movie_budget + bcolors.ENDC)
print(bcolors.RED + "Opening Weekend USA: " + bcolors.BOLD + bcolors.GREEN + movie_opening + bcolors.ENDC)
print(bcolors.RED + "Gross USA: " + bcolors.BOLD + bcolors.GREEN + movie_usa_gross + bcolors.ENDC)
print(bcolors.RED + "Cumulative Worldwide Gross: " + bcolors.BOLD + bcolors.GREEN + movie_world_gross + bcolors.ENDC)
print(bcolors.RED + "Ratio: " + bcolors.BOLD + bcolors.GREEN + movie_ratio + bcolors.ENDC)
print(bcolors.RED + "Taglines: " + bcolors.BOLD + bcolors.GREEN + movie_tag_lines + bcolors.ENDC)
print(bcolors.RED + "URL: " + bcolors.BOLD + bcolors.GREEN + movie_link + bcolors.ENDC)
except Exception as e:
stop = True
time.sleep(1)
print(bcolors.ITALIC + bcolors.WARNING + "Something goes wrong!\nError: {}".format(e), bcolors.ENDC)
sys.exit(1)

print(Fore.LIGHTRED_EX + "Title: " + Fore.LIGHTGREEN_EX + movietitle)
print(Fore.LIGHTRED_EX + "IMDB Rating: " + Fore.LIGHTYELLOW_EX + movierating + "/10")
if metascore: print(Fore.LIGHTRED_EX + "Metascore: " + Fore.LIGHTYELLOW_EX + metascore + "/100")
print(Fore.LIGHTRED_EX + "Length: " + Fore.LIGHTCYAN_EX + movielength)
print(Fore.LIGHTRED_EX + "Year: " + Fore.LIGHTMAGENTA_EX + movieyear)
print(Fore.LIGHTRED_EX + "Genre: " + Fore.LIGHTBLUE_EX + moviegenres)
print(Fore.LIGHTRED_EX + "Description: " + Fore.LIGHTWHITE_EX + moviedesc)
print(Fore.LIGHTRED_EX + "Release date: " + Fore.LIGHTCYAN_EX + releasedate)
if contentrating: print(Fore.LIGHTRED_EX + "Rating: " + Fore.LIGHTCYAN_EX + contentrating)
print(Fore.LIGHTRED_EX + "Director: " + Fore.LIGHTBLACK_EX + moviedirector)
print(Fore.LIGHTRED_EX + "Lead Cast: " + Fore.LIGHTBLACK_EX + movieactors)
print(Fore.LIGHTRED_EX + "Country: " + Fore.LIGHTBLUE_EX + moviecountry)
print(Fore.LIGHTRED_EX + "Also Known As: " + Fore.LIGHTBLUE_EX + moviealsoknown)
print(Fore.LIGHTRED_EX + "Budget: " + Fore.LIGHTBLUE_EX + moviebudget)
print(Fore.LIGHTRED_EX + "Opening Weekend USA: " + Fore.LIGHTBLUE_EX + movieopening)
print(Fore.LIGHTRED_EX + "Gross USA: " + Fore.LIGHTBLUE_EX + movieusagross)
print(Fore.LIGHTRED_EX + "Cumulative Worldwide Gross: " + Fore.LIGHTBLUE_EX + movieworldgross)
print(Fore.LIGHTRED_EX + "Ratio: " + Fore.LIGHTBLUE_EX + movieratio)
print(Fore.LIGHTRED_EX + "Taglines: " + Fore.LIGHTBLUE_EX + movietaglines)
except:
print(Fore.LIGHTRED_EX+"Something's wrong,Try Again Later")
if __name__ == '__main__':
main()
14 changes: 7 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
beautifulsoup4==4.6.0
beautifulsoup4==4.9.3
bs4==0.0.1
certifi==2018.1.18
chardet==3.0.4
colorama==0.3.9
idna==2.6
certifi==2020.12.5
chardet==4.0.0
idna==2.10
pkg-resources==0.0.0
requests==2.18.4
urllib3==1.22
requests==2.25.1
soupsieve==2.2.1
urllib3==1.26.4
42 changes: 42 additions & 0 deletions utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
class bcolors:
'''
Bash script styling

Source: https://stackoverflow.com/a/42449998/12537848
'''
HEADER = '\033[95m'
RED = '\033[91m'
GREEN = '\033[92m'
BLUE = '\033[94m'
WARNING = '\033[93m'
BOLD = '\033[1m'
ITALIC = '\033[3m'
UNDERLINE = '\033[4m'
ENDC = '\033[0m'


class Cleaner:
'''
Get rid of useless contents
'''

def __init__(self):
pass

def text_cleaner(self, dict_cast):
'''
Gets rid of useless characters

Input:
:param: text -> A dictionary of movie casts to be gotten rid of the useless characters.
:type: dict

Returns:
A cleaned string dictionary
'''
cleaned_text = {}
for item in dict_cast:
pos = dict_cast[item].find('|')
cleaned_text[item] = (dict_cast[item][:pos].strip())

return cleaned_text