-
Notifications
You must be signed in to change notification settings - Fork 0
/
Movie_Data.py
44 lines (37 loc) · 1.8 KB
/
Movie_Data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
'''
Created on Jan 5, 2021
@author: Alex Kim
Analyzed a movie metadata set allowing users to observe trends between a
director's films and the film's respective IDMB Scores
'''
#Importing needed packages
import pandas as pd
import plotly.express as px
#Read in movie data set
df = pd.read_csv('movie_metadata.csv')
df = df.dropna()
#Asking user to input a Director's name
director1 = raw_input("What Director's work would you like to analyze? ")
#Subsetting data to only show films directed by the given director
directorWork = df[df.director_name == director1]
#Creates a bar chart to show the relationship between films and film IMDB scores
fig = px.bar(df, x = directorWork.movie_title, y = directorWork.imdb_score,
hover_name = directorWork.movie_title,
color = directorWork.imdb_score,
title = '{} Films vs. {} Film IMDB Scores'.format(director1, director1),
labels = {'x' : '{} Movie'.format(director1), 'y' : 'IMDB Score'},
text = directorWork.imdb_score
)
fig.update_traces(hoverinfo = 'skip', hovertemplate = None, textposition = 'outside')
#Prints the bar chart
print(fig.show())
#Finds the movie with the highest rating and the movie with the lowest rating
bmIndex = directorWork.imdb_score.sort_values(ascending = False).head(1).index[0]
bestMovie = df.loc[bmIndex].movie_title
bmScore = df.loc[bmIndex].imdb_score
wmIndex = directorWork.imdb_score.sort_values().head(1).index[0]
worstMovie = df.loc[wmIndex].movie_title
wmScore = df.loc[wmIndex].imdb_score
#prints the director's highest rated film and the director's lowest rated film
print("{}'s highest rated film is {} with an IMDB score of {}".format(director1, str(bestMovie), str(bmScore)))
print("{}'s lowest rated film is {} with an IMDB score of {}".format(director1, str(worstMovie), str(wmScore)))