You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
116 lines
4.4 KiB
116 lines
4.4 KiB
import requests
|
|
import pandas as pd
|
|
|
|
class Omdb:
|
|
'''
|
|
Python API for OMBD API. Please obtain an API key here: http://www.omdbapi.com/apikey.aspx
|
|
To use, instantiate a class like so:
|
|
my_call = Omdb(movies, 'e55xe9e9')
|
|
|
|
-- Instantiating a class
|
|
movies<dataframe>: a dataframe. Must have the columns 'year' and 'title'.
|
|
If your df differs, set the private class properties self.yearcol
|
|
and self.titlecol appropriately.
|
|
'e55xe9e9'<string>: this is an example api key
|
|
class properties: these have been set as properties as convenience to
|
|
allow the user to avoid refactoring the class as the API evolves.
|
|
|
|
-- Calling the API
|
|
Use the .get_ratings() class method. It will look up the movie by year
|
|
and title and return the IMDB ID. It will then make another call to the API,
|
|
by IMDB ID, which is needed to retrieve the ratings details. There is no
|
|
way at the time of writing to call the API with more than one movie. Also,
|
|
note that each movie rating requires 2 calls to the API. A free token at
|
|
the time of writing allows 1,000 calls per day. Therefore, you may retrieve
|
|
a maximum of 500 movie ratings per day with this script and a free API token.
|
|
'''
|
|
|
|
def __init__(self, df, apikey):
|
|
|
|
# public user variables
|
|
self.df = df.copy()
|
|
self.apikey = apikey
|
|
|
|
# private user varibles
|
|
self.yearcol = 'year'
|
|
self.titlecol = 'title'
|
|
|
|
# private api variables
|
|
self.endpoint = 'http://www.omdbapi.com/'
|
|
self.apikey_param = 'apikey'
|
|
self.search_param = 's'
|
|
self.year_param = 'y'
|
|
self.id_param = 'i'
|
|
# RATINGS: used for parsing json query response when using imdbRating, get_ratings()
|
|
self.imdbRating = 'imdbRating'
|
|
self.Ratings = 'Ratings'
|
|
self.Source = 'Source'
|
|
self.Value = 'Value'
|
|
# ID: used for parsing json query response when using imdbID, __get_imdb_id
|
|
self.Search = 'Search'
|
|
self.imdb_id_idx = 0 # take the first result, this is a hack since we are not checking for multiple results
|
|
self.imdbID = 'imdbID'
|
|
|
|
def __get_imdb_id(self, year, title):
|
|
'''
|
|
Private method. Retrieves the IMDB ID for the movie
|
|
which is needed to retrieve the detailed rating review numbers.
|
|
'''
|
|
return requests.get(
|
|
self.endpoint,
|
|
params={
|
|
self.apikey_param : self.apikey,
|
|
self.search_param : title,
|
|
self.year_param : year}
|
|
).json()[self.Search][self.imdb_id_idx][self.imdbID]
|
|
|
|
def get_ratings(self):
|
|
'''
|
|
Returns: dataframe with all ratings from every source for each movie.
|
|
Result is joined with the original dataframe from which the class
|
|
was instantiated, with the ratings columns added to the end.
|
|
Datatype and source may vary as the API evolves.
|
|
'''
|
|
buf = {}
|
|
for idx, year, title in zip(self.df.index,
|
|
self.df[self.yearcol],
|
|
self.df[self.titlecol]):
|
|
# catch for the two API calls
|
|
try:
|
|
# call the api by ID for verbose rating information
|
|
# which calls __get_imdb_id api by search for the ID info
|
|
res = (requests.get(
|
|
self.endpoint,
|
|
params={self.apikey_param: self.apikey,
|
|
self.id_param: self.__get_imdb_id(year, title)
|
|
}).json()
|
|
)
|
|
# iterate through each of the rating authorities for a given ID
|
|
buf[idx] = {}
|
|
# catch for parsing the ratings for the results from the api calls
|
|
try:
|
|
for source in res[self.Ratings]:
|
|
buf[idx].update({source[self.Source]: source[self.Value]})
|
|
except:
|
|
continue
|
|
except:
|
|
continue
|
|
|
|
# return buf
|
|
return pd.merge(self.df,
|
|
pd.DataFrame.from_dict(buf, orient='index'),
|
|
how='left',
|
|
left_index=True,
|
|
right_index=True)
|
|
|
|
|
|
def main():
|
|
|
|
'''
|
|
Not implemented
|
|
'''
|
|
print('Not intended to be run as a script, exiting...')
|
|
|
|
if __name__ == "__main__":
|
|
main()
|