{ "cells": [ { "cell_type": "code", "execution_count": 348, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings('ignore')\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "import pandas as pd\n", "from scipy import stats\n", "from scipy import stats, special\n", "from sklearn import model_selection, metrics, linear_model, datasets, feature_selection\n", "from sklearn import neighbors\n", "from sklearn.preprocessing import StandardScaler\n", "import time\n", "from scipy import io\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.model_selection import cross_val_score, KFold\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.model_selection import cross_validate\n", "from sklearn.preprocessing import LabelEncoder\n", "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n", "import json\n", "import seaborn as sns\n", "import ast" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Exploratory Data Analysis (EDA)" ] }, { "cell_type": "code", "execution_count": 349, "metadata": {}, "outputs": [], "source": [ "train = pd.read_csv('train.csv')" ] }, { "cell_type": "code", "execution_count": 350, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3000, 23)" ] }, "execution_count": 350, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.shape" ] }, { "cell_type": "code", "execution_count": 351, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idbelongs_to_collectionbudgetgenreshomepageimdb_idoriginal_languageoriginal_titleoverviewpopularity...release_dateruntimespoken_languagesstatustaglinetitleKeywordscastcrewrevenue
01[{'id': 313576, 'name': 'Hot Tub Time Machine ...14000000[{'id': 35, 'name': 'Comedy'}]NaNtt2637294enHot Tub Time Machine 2When Lou, who has become the \"father of the In...6.575393...2/20/1593.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThe Laws of Space and Time are About to be Vio...Hot Tub Time Machine 2[{'id': 4379, 'name': 'time travel'}, {'id': 9...[{'cast_id': 4, 'character': 'Lou', 'credit_id...[{'credit_id': '59ac067c92514107af02c8c8', 'de...12314651
12[{'id': 107674, 'name': 'The Princess Diaries ...40000000[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...NaNtt0368933enThe Princess Diaries 2: Royal EngagementMia Thermopolis is now a college graduate and ...8.248895...8/6/04113.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedIt can take a lifetime to find true love; she'...The Princess Diaries 2: Royal Engagement[{'id': 2505, 'name': 'coronation'}, {'id': 42...[{'cast_id': 1, 'character': 'Mia Thermopolis'...[{'credit_id': '52fe43fe9251416c7502563d', 'de...95149435
23NaN3300000[{'id': 18, 'name': 'Drama'}]http://sonyclassics.com/whiplash/tt2582802enWhiplashUnder the direction of a ruthless instructor, ...64.299990...10/10/14105.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThe road to greatness can take you to the edge.Whiplash[{'id': 1416, 'name': 'jazz'}, {'id': 1523, 'n...[{'cast_id': 5, 'character': 'Andrew Neimann',...[{'credit_id': '54d5356ec3a3683ba0000039', 'de...13092000
34NaN1200000[{'id': 53, 'name': 'Thriller'}, {'id': 18, 'n...http://kahaanithefilm.com/tt1821480hiKahaaniVidya Bagchi (Vidya Balan) arrives in Kolkata ...3.174936...3/9/12122.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedNaNKahaani[{'id': 10092, 'name': 'mystery'}, {'id': 1054...[{'cast_id': 1, 'character': 'Vidya Bagchi', '...[{'credit_id': '52fe48779251416c9108d6eb', 'de...16000000
45NaN0[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...NaNtt1380152ko마린보이Marine Boy is the story of a former national s...1.148070...2/5/09118.0[{'iso_639_1': 'ko', 'name': '한국어/조선말'}]ReleasedNaNMarine BoyNaN[{'cast_id': 3, 'character': 'Chun-soo', 'cred...[{'credit_id': '52fe464b9251416c75073b43', 'de...3923970
\n", "

5 rows × 23 columns

\n", "
" ], "text/plain": [ " id belongs_to_collection budget \\\n", "0 1 [{'id': 313576, 'name': 'Hot Tub Time Machine ... 14000000 \n", "1 2 [{'id': 107674, 'name': 'The Princess Diaries ... 40000000 \n", "2 3 NaN 3300000 \n", "3 4 NaN 1200000 \n", "4 5 NaN 0 \n", "\n", " genres \\\n", "0 [{'id': 35, 'name': 'Comedy'}] \n", "1 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n", "2 [{'id': 18, 'name': 'Drama'}] \n", "3 [{'id': 53, 'name': 'Thriller'}, {'id': 18, 'n... \n", "4 [{'id': 28, 'name': 'Action'}, {'id': 53, 'nam... \n", "\n", " homepage imdb_id original_language \\\n", "0 NaN tt2637294 en \n", "1 NaN tt0368933 en \n", "2 http://sonyclassics.com/whiplash/ tt2582802 en \n", "3 http://kahaanithefilm.com/ tt1821480 hi \n", "4 NaN tt1380152 ko \n", "\n", " original_title \\\n", "0 Hot Tub Time Machine 2 \n", "1 The Princess Diaries 2: Royal Engagement \n", "2 Whiplash \n", "3 Kahaani \n", "4 마린보이 \n", "\n", " overview popularity ... \\\n", "0 When Lou, who has become the \"father of the In... 6.575393 ... \n", "1 Mia Thermopolis is now a college graduate and ... 8.248895 ... \n", "2 Under the direction of a ruthless instructor, ... 64.299990 ... \n", "3 Vidya Bagchi (Vidya Balan) arrives in Kolkata ... 3.174936 ... \n", "4 Marine Boy is the story of a former national s... 1.148070 ... \n", "\n", " release_date runtime spoken_languages \\\n", "0 2/20/15 93.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "1 8/6/04 113.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "2 10/10/14 105.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "3 3/9/12 122.0 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... \n", "4 2/5/09 118.0 [{'iso_639_1': 'ko', 'name': '한국어/조선말'}] \n", "\n", " status tagline \\\n", "0 Released The Laws of Space and Time are About to be Vio... \n", "1 Released It can take a lifetime to find true love; she'... \n", "2 Released The road to greatness can take you to the edge. \n", "3 Released NaN \n", "4 Released NaN \n", "\n", " title \\\n", "0 Hot Tub Time Machine 2 \n", "1 The Princess Diaries 2: Royal Engagement \n", "2 Whiplash \n", "3 Kahaani \n", "4 Marine Boy \n", "\n", " Keywords \\\n", "0 [{'id': 4379, 'name': 'time travel'}, {'id': 9... \n", "1 [{'id': 2505, 'name': 'coronation'}, {'id': 42... \n", "2 [{'id': 1416, 'name': 'jazz'}, {'id': 1523, 'n... \n", "3 [{'id': 10092, 'name': 'mystery'}, {'id': 1054... \n", "4 NaN \n", "\n", " cast \\\n", "0 [{'cast_id': 4, 'character': 'Lou', 'credit_id... \n", "1 [{'cast_id': 1, 'character': 'Mia Thermopolis'... \n", "2 [{'cast_id': 5, 'character': 'Andrew Neimann',... \n", "3 [{'cast_id': 1, 'character': 'Vidya Bagchi', '... \n", "4 [{'cast_id': 3, 'character': 'Chun-soo', 'cred... \n", "\n", " crew revenue \n", "0 [{'credit_id': '59ac067c92514107af02c8c8', 'de... 12314651 \n", "1 [{'credit_id': '52fe43fe9251416c7502563d', 'de... 95149435 \n", "2 [{'credit_id': '54d5356ec3a3683ba0000039', 'de... 13092000 \n", "3 [{'credit_id': '52fe48779251416c9108d6eb', 'de... 16000000 \n", "4 [{'credit_id': '52fe464b9251416c75073b43', 'de... 3923970 \n", "\n", "[5 rows x 23 columns]" ] }, "execution_count": 351, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.head()" ] }, { "cell_type": "code", "execution_count": 352, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idbelongs_to_collectionbudgetgenreshomepageimdb_idoriginal_languageoriginal_titleoverviewpopularity...release_dateruntimespoken_languagesstatustaglinetitleKeywordscastcrewrevenue
21352136[{'id': 295, 'name': 'Pirates of the Caribbean...380000000[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...http://disney.go.com/pirates/index-on-stranger...tt1298650enPirates of the Caribbean: On Stranger TidesCaptain Jack Sparrow crosses paths with a woma...27.887720...5/14/11136.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedLive Forever Or Die Trying.Pirates of the Caribbean: On Stranger Tides[{'id': 658, 'name': 'sea'}, {'id': 1316, 'nam...[{'cast_id': 15, 'character': 'Captain Jack Sp...[{'credit_id': '566b4f54c3a3683f56005151', 'de...1045713802
22092210[{'id': 295, 'name': 'Pirates of the Caribbean...300000000[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...http://disney.go.com/disneypictures/pirates/tt0449088enPirates of the Caribbean: At World's EndCaptain Barbossa, long believed to be dead, ha...31.363664...5/19/07169.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedAt the end of the world, the adventure begins.Pirates of the Caribbean: At World's End[{'id': 270, 'name': 'ocean'}, {'id': 726, 'na...[{'cast_id': 4, 'character': 'Captain Jack Spa...[{'credit_id': '52fe4232c3a36847f800b579', 'de...961000000
27702771[{'id': 86311, 'name': 'The Avengers Collectio...280000000[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...http://marvel.com/movies/movie/193/avengers_ag...tt2395427enAvengers: Age of UltronWhen Tony Stark tries to jumpstart a dormant p...37.379420...4/22/15141.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedA New Age Has Come.Avengers: Age of Ultron[{'id': 8828, 'name': 'marvel comic'}, {'id': ...[{'cast_id': 76, 'character': 'Tony Stark / Ir...[{'credit_id': '55d5f7d4c3a3683e7e0016eb', 'de...1405403694
14251426[{'id': 8537, 'name': 'Superman Collection', '...270000000[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...http://www.superman.comtt0348150enSuperman ReturnsSuperman returns to discover his 5-year absenc...13.284712...6/28/06154.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedNaNSuperman Returns[{'id': 83, 'name': 'saving the world'}, {'id'...[{'cast_id': 3, 'character': 'Superman / Clark...[{'credit_id': '553bef6a9251416874003c8f', 'de...391081192
12701271NaN260000000[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...http://movies.disney.com/john-cartertt0401729enJohn CarterJohn Carter is a war-weary, former military ca...14.670353...3/7/12132.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedLost in our world, found in another.John Carter[{'id': 818, 'name': 'based on novel'}, {'id':...[{'cast_id': 5, 'character': 'John Carter', 'c...[{'credit_id': '52fe479ac3a36847f813eaa3', 'de...284139100
16301631[{'id': 8650, 'name': 'Transformers Collection...260000000[{'id': 28, 'name': 'Action'}, {'id': 878, 'na...http://www.transformersmovie.com/tt3371366enTransformers: The Last KnightAutobots and Decepticons are at war, with huma...39.186819...6/21/17149.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedFor one world to live, the other must die.Transformers: The Last Knight[{'id': 10466, 'name': 'knight'}, {'id': 10607...[{'cast_id': 2, 'character': 'Cade Yeager', 'c...[{'credit_id': '5553e38bc3a368208f000502', 'de...604942143
25322533[{'id': 121938, 'name': 'The Hobbit Collection...250000000[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...http://www.thehobbit.com/tt0903624enThe Hobbit: An Unexpected JourneyBilbo Baggins, a hobbit enjoying his quiet lif...23.253089...11/26/12169.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedFrom the smallest beginnings come the greatest...The Hobbit: An Unexpected Journey[{'id': 483, 'name': 'riddle'}, {'id': 603, 'n...[{'cast_id': 6, 'character': 'Gandalf', 'credi...[{'credit_id': '52fe4783c3a36847f8139f7f', 'de...1021103568
961962[{'id': 1241, 'name': 'Harry Potter Collection...250000000[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...http://harrypotter.warnerbros.com/harrypottera...tt0417741enHarry Potter and the Half-Blood PrinceAs Harry begins his sixth year at Hogwarts, he...19.083723...7/7/09153.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedDark Secrets RevealedHarry Potter and the Half-Blood Prince[{'id': 616, 'name': 'witch'}, {'id': 2343, 'n...[{'cast_id': 3, 'character': 'Harry Potter', '...[{'credit_id': '52fe4273c3a36847f801fab1', 'de...933959197
906907[{'id': 263, 'name': 'The Dark Knight Collecti...250000000[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...http://www.thedarkknightrises.com/tt1345836enThe Dark Knight RisesFollowing the death of District Attorney Harve...20.582580...7/16/12165.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThe Legend EndsThe Dark Knight Rises[{'id': 849, 'name': 'dc comics'}, {'id': 853,...NaNNaN1084939099
666667[{'id': 121938, 'name': 'The Hobbit Collection...250000000[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...http://www.thehobbit.com/tt1170358enThe Hobbit: The Desolation of SmaugThe Dwarves, Bilbo and Gandalf have successful...20.644776...12/11/13161.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedBeyond darkness... beyond desolation... lies t...The Hobbit: The Desolation of Smaug[{'id': 603, 'name': 'elves'}, {'id': 604, 'na...[{'cast_id': 3, 'character': 'Bilbo Baggins', ...[{'credit_id': '5350e7b0c3a3681d93000e5d', 'de...958400000
27372738[{'id': 645, 'name': 'James Bond Collection', ...245000000[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...http://www.sonypictures.com/movies/spectre/tt2379713enSpectreA cryptic message from Bond‚Äôs past sends him...24.926577...10/26/15148.0[{'iso_639_1': 'fr', 'name': 'Français'}, {'is...ReleasedA Plan No One EscapesSpectre[{'id': 470, 'name': 'spy'}, {'id': 818, 'name...[{'cast_id': 1, 'character': 'James Bond', 'cr...[{'credit_id': '5751eed59251416b60000637', 'de...880674609
12491250[{'id': 420, 'name': 'The Chronicles of Narnia...225000000[{'id': 12, 'name': 'Adventure'}, {'id': 10751...NaNtt0499448enThe Chronicles of Narnia: Prince CaspianOne year after their incredible adventures in ...12.490891...5/15/08150.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedHope has a new face.The Chronicles of Narnia: Prince Caspian[{'id': 818, 'name': 'based on novel'}, {'id':...[{'cast_id': 1, 'character': 'Prince Caspian',...[{'credit_id': '55a239e69251412979002e8b', 'de...419651413
23572358[{'id': 209131, 'name': 'Man of Steel Collecti...225000000[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...http://www.manofsteel.com/tt0770828enMan of SteelA young boy learns that he has extraordinary p...18.538834...6/12/13143.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedYou will believe that a man can fly.Man of Steel[{'id': 83, 'name': 'saving the world'}, {'id'...[{'cast_id': 2, 'character': 'Clark Kent / Kal...[{'credit_id': '52fe4799c3a36847f813e601', 'de...662845518
11261127[{'id': 86311, 'name': 'The Avengers Collectio...220000000[{'id': 878, 'name': 'Science Fiction'}, {'id'...http://marvel.com/avengers_movie/tt0848228enThe AvengersWhen an unexpected enemy emerges and threatens...89.887648...4/25/12143.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedSome assembly required.The Avengers[{'id': 242, 'name': 'new york'}, {'id': 5539,...[{'cast_id': 46, 'character': 'Tony Stark / Ir...[{'credit_id': '52fe4495c3a368484e02b1cf', 'de...1519557910
18801881[{'id': 748, 'name': 'X-Men Collection', 'post...210000000[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...NaNtt0376994enX-Men: The Last StandWhen a cure is found to treat mutations, lines...0.867943...5/24/06104.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedTake a StandX-Men: The Last Stand[{'id': 1852, 'name': 'mutant'}, {'id': 8828, ...[{'cast_id': 4, 'character': 'Logan / Wolverin...[{'credit_id': '538d82720e0a26670e005e83', 'de...459359555
20352036NaN207000000[{'id': 12, 'name': 'Adventure'}, {'id': 18, '...NaNtt0360717enKing KongIn 1933 New York, an overly ambitious movie pr...19.761164...12/14/05187.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThe eighth wonder of the world.King Kong[{'id': 774, 'name': 'film business'}, {'id': ...[{'cast_id': 5, 'character': 'Ann Darrow', 'cr...[{'credit_id': '52fe422ec3a36847f800a1d7', 'de...550000000
12221223[{'id': 87118, 'name': 'Cars Collection', 'pos...200000000[{'id': 16, 'name': 'Animation'}, {'id': 10751...http://www.disney.go.com/cars/tt1216475enCars 2Star race car Lightning McQueen and his pal Ma...13.693002...6/11/11106.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedKa-ciao!Cars 2[{'id': 830, 'name': 'car race'}, {'id': 9663,...[{'cast_id': 4, 'character': 'Lightning McQuee...[{'credit_id': '52fe477fc3a36847f8139271', 'de...559852396
763764[{'id': 416468, 'name': 'World War Z Collectio...200000000[{'id': 28, 'name': 'Action'}, {'id': 18, 'nam...http://www.worldwarzmovie.comtt0816711enWorld War ZLife for former United Nations investigator Ge...26.114917...6/20/13116.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedRemember Philly!World War Z[{'id': 4565, 'name': 'dystopia'}, {'id': 1233...[{'cast_id': 4, 'character': 'Gerry Lane', 'cr...[{'credit_id': '570bbe9ec3a3684b50000391', 'de...531865000
18111812[{'id': 528, 'name': 'The Terminator Collectio...200000000[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...NaNtt0181852enTerminator 3: Rise of the MachinesIt's been 10 years since John Connor saved Ear...20.818907...7/2/03109.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThe Machines Will Rise.Terminator 3: Rise of the Machines[{'id': 83, 'name': 'saving the world'}, {'id'...[{'cast_id': 6, 'character': 'The Terminator',...[{'credit_id': '52fe4233c3a36847f800bc99', 'de...435000000
881882[{'id': 261307, 'name': 'Alice in Wonderland C...200000000[{'id': 10751, 'name': 'Family'}, {'id': 14, '...http://disney.go.com/wonderland/tt1014759enAlice in WonderlandAlice, an unpretentious and individual 19-year...17.285093...3/3/10108.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedYou're invited to a very important date.Alice in Wonderland[{'id': 818, 'name': 'based on novel'}, {'id':...[{'cast_id': 7, 'character': 'Alice Kingsleigh...[{'credit_id': '52fe44c09251416c7503fbc3', 'de...1025491110
25622563[{'id': 137697, 'name': 'Finding Nemo Collecti...200000000[{'id': 12, 'name': 'Adventure'}, {'id': 16, '...http://movies.disney.com/finding-dorytt2277860enFinding DoryDory is reunited with her friends Nemo and Mar...14.477677...6/16/1697.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedAn unforgettable journey she probably won't re...Finding Dory[{'id': 1357, 'name': 'fish'}, {'id': 1453, 'n...[{'cast_id': 2, 'character': 'Dory (voice)', '...[{'credit_id': '55eb2fbb92514106d60041ab', 'de...1028570889
18381839[{'id': 645, 'name': 'James Bond Collection', ...200000000[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...http://www.mgm.com/view/movie/234/Quantum-of-S...tt0830515enQuantum of SolaceQuantum of Solace continues the adventures of ...13.678120...10/30/08106.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedFor love, for hate, for justice, for revenge.Quantum of Solace[{'id': 627, 'name': 'killing'}, {'id': 1568, ...[{'cast_id': 1, 'character': 'James Bond', 'cr...[{'credit_id': '52fe43b29251416c7501aa63', 'de...586090727
21262127[{'id': 284433, 'name': 'Guardians of the Gala...200000000[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...http://marvel.com/movies/movie/221/guardians_o...tt3896198enGuardians of the Galaxy Vol. 2The Guardians must fight to keep their newfoun...185.330992...4/19/17137.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedObviously.Guardians of the Galaxy Vol. 2[{'id': 9663, 'name': 'sequel'}, {'id': 9715, ...[{'cast_id': 3, 'character': 'Peter Quill / St...[{'credit_id': '59171547925141583c0315a6', 'de...863416141
26232624NaN197471676[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...NaNtt2239822enValerian and the City of a Thousand PlanetsIn the 28th century, Valerian and Laureline ar...15.262706...7/20/17137.0[{'iso_639_1': 'fr', 'name': 'Français'}, {'is...ReleasedNaNValerian and the City of a Thousand Planets[{'id': 1370, 'name': 'shapeshifting'}, {'id':...NaNNaN90024292
927928NaN195000000[{'id': 28, 'name': 'Action'}, {'id': 10751, '...http://jackthegiantkiller.warnerbros.comtt1351685enJack the Giant SlayerThe story of an ancient war that is reignited ...11.606061...2/27/13114.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedPrepare for a giant adventureJack the Giant Slayer[{'id': 179411, 'name': 'based on fairy tale'}...[{'cast_id': 7, 'character': 'Jack', 'credit_i...[{'credit_id': '52fe47ef9251416c9107aa0f', 'de...197687603
23222323[{'id': 8650, 'name': 'Transformers Collection...195000000[{'id': 28, 'name': 'Action'}, {'id': 878, 'na...http://www.transformersmovie.com/tt1399103enTransformers: Dark of the MoonSam Witwicky takes his first tenuous steps int...4.503505...6/28/11154.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThe invasion we always feared. An enemy we nev...Transformers: Dark of the Moon[{'id': 305, 'name': 'moon'}, {'id': 1612, 'na...[{'cast_id': 3, 'character': 'Sam Witwicky', '...[{'credit_id': '537f0b740e0a2624b40044d0', 'de...1123746996
13941395NaN190000000[{'id': 12, 'name': 'Adventure'}, {'id': 10751...http://movies.disney.com/tomorrowlandtt1964418enTomorrowlandBound by a shared destiny, a bright, optimisti...22.296076...5/19/15130.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedImagine a world where nothing is impossible.Tomorrowland[{'id': 1436, 'name': 'inventor'}, {'id': 1233...[{'cast_id': 17, 'character': 'Casey Newton', ...[{'credit_id': '55bb85e6c3a36869b400723c', 'de...209154322
17611762[{'id': 9485, 'name': 'The Fast and the Furiou...190000000[{'id': 28, 'name': 'Action'}]http://www.furious7.com/tt2820852enFurious 7Deckard Shaw seeks revenge against Dominic Tor...27.275687...4/1/15137.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedVengeance Hits HomeFurious 7[{'id': 830, 'name': 'car race'}, {'id': 3428,...[{'cast_id': 17, 'character': 'Dominic Toretto...[{'credit_id': '52fe4cc8c3a36847f823e681', 'de...1506249360
16731674[{'id': 263, 'name': 'The Dark Knight Collecti...185000000[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...http://thedarkknight.warnerbros.com/dvdsite/tt0468569enThe Dark KnightBatman raises the stakes in his war on crime. ...123.167259...7/16/08152.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedWhy So Serious?The Dark Knight[{'id': 849, 'name': 'dc comics'}, {'id': 853,...[{'cast_id': 35, 'character': 'Bruce Wayne / B...[{'credit_id': '55a0eb4a925141296b0010f8', 'de...1004558444
902903[{'id': 84, 'name': 'Indiana Jones Collection'...185000000[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...http://www.indianajones.com/site/index.htmltt0367882enIndiana Jones and the Kingdom of the Crystal S...Set during the Cold War, the Soviets – led b...12.577266...5/21/08122.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedThe adventure continues . . .Indiana Jones and the Kingdom of the Crystal S...[{'id': 83, 'name': 'saving the world'}, {'id'...[{'cast_id': 4, 'character': 'Indiana Jones', ...[{'credit_id': '52fe4227c3a36847f800847d', 'de...786636033
..................................................................
24842485[{'id': 10457, 'name': 'Once Upon a Time in Ch...0[{'id': 28, 'name': 'Action'}, {'id': 35, 'nam...NaNtt0105839cn黃飛鴻之二男兒當自強In the sequel to the Tsui Hark classic, Wong F...3.606688...9/16/92113.0[{'iso_639_1': 'cn', 'name': '广州话 / 廣州話'}, {'i...ReleasedNaNOnce Upon a Time in China II[{'id': 478, 'name': 'china'}, {'id': 779, 'na...[{'cast_id': 13, 'character': 'Wong Fei-hung',...[{'credit_id': '52fe43959251416c75016245', 'de...30399676
152153NaN0[{'id': 18, 'name': 'Drama'}, {'id': 14, 'name...NaNtt0098061enPaperhouseAnna is becoming lost in the loneliness of her...3.520171...1/1/8892.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedA drawing that became a dream. A dream that be...Paperhouse[{'id': 3030, 'name': 'nightmare'}, {'id': 609...[{'cast_id': 9, 'character': 'Anna Madden', 'c...[{'credit_id': '52fe45e5c3a36847f80e0a31', 'de...241
151152NaN0[{'id': 99, 'name': 'Documentary'}]NaNtt5278464enHoly HellAn inside look at a West Hollywood cult formed...1.279443...1/25/16100.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedNaNHoly Hell[{'id': 162766, 'name': 'religious cult'}, {'i...[{'cast_id': 7, 'character': 'Himself', 'credi...[{'credit_id': '57cab4999251411ac500279a', 'de...16905
15951596NaN0[{'id': 53, 'name': 'Thriller'}, {'id': 878, '...NaNtt2866360enCoherenceOn the night a comet is passing near Earth, ei...6.851243...9/19/1389.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedNothing is randomCoherence[{'id': 2340, 'name': 'paranoia'}, {'id': 4803...[{'cast_id': 10, 'character': 'Hugh', 'credit_...[{'credit_id': '54e1c53fc3a36845410081dc', 'de...102617
16381639[{'id': 102019, 'name': 'Death Note Collection...0[{'id': 14, 'name': 'Fantasy'}, {'id': 28, 'na...http://wwws.warnerbros.co.jp/L-moviett0912597jaL: change the WorLd\"The human whose name is written in the Death ...8.271179...1/31/08129.0[{'iso_639_1': 'ja', 'name': '日本語'}]ReleasedThe final death note showdown!L: Change the World[{'id': 9663, 'name': 'sequel'}][{'cast_id': 1, 'character': 'Watari', 'credit...[{'credit_id': '52fe43e1c3a368484e0039a7', 'de...30231200
16451646NaN0[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...NaNtt0117364enThe Portrait of a LadyMs. Isabel Archer isn't afraid to challenge so...4.139050...10/18/96144.0[{'iso_639_1': 'it', 'name': 'Italiano'}, {'is...ReleasedBased on the Novel by Henry James.The Portrait of a Lady[{'id': 1295, 'name': 'europe'}, {'id': 6054, ...[{'cast_id': 1, 'character': 'Isabel Archer', ...[{'credit_id': '5762f0fec3a3684b6700003c', 'de...3692836
24632464NaN0[{'id': 18, 'name': 'Drama'}]NaNtt4428814frLa Loi du marchéAt the age of 51 and after 20 months on unempl...4.778045...3/16/1593.0[{'iso_639_1': 'fr', 'name': 'Français'}]ReleasedNaNThe Measure of a Man[{'id': 156052, 'name': 'unemployment'}][{'cast_id': 1, 'character': 'Thierry Taugourd...[{'credit_id': '553b87649251412c27001037', 'de...106498
16511652[{'id': 269098, 'name': 'Police Story Collecti...0[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...NaNtt0116704cn警察故事4之簡單任務Jackie Chan reprises his role as Chan Ka-Kui (...9.206356...2/10/96107.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedThey thought they possessed the ultimate weapo...First Strike[{'id': 720, 'name': 'helicopter'}, {'id': 779...[{'cast_id': 1, 'character': 'Insp. Chan Ka Ku...[{'credit_id': '52fe44f3c3a36847f80b3b93', 'de...21890845
17011702NaN0[{'id': 99, 'name': 'Documentary'}]NaNtt3013258frSur le chemin de l'écoleThese children live in the four corners of the...1.784682...9/25/1377.0[{'iso_639_1': 'fr', 'name': 'Français'}]ReleasedNaNOn the Way to SchoolNaN[{'cast_id': 5, 'character': 'Elle-m√™me', 'cr...[{'credit_id': '52fe4de89251416c7514504d', 'de...7424
17001701NaN0[{'id': 18, 'name': 'Drama'}]NaNtt0104756enLorenzo's OilLorenzo Odone was a normal child until the age...9.968395...12/30/92135.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedSome people make their own miracles.Lorenzo's Oil[{'id': 970, 'name': 'parent child relationshi...[{'cast_id': 13, 'character': 'Michaela Odone'...[{'credit_id': '52fe432bc3a36847f803fa9f', 'de...7286388
16971698NaN0[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...NaNtt0105391enShining ThroughSpirited New Yorker Linda Voss goes to work fo...5.860319...1/31/92132.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedHe needed to trust her with his secret. She ha...Shining Through[{'id': 74, 'name': 'germany'}, {'id': 818, 'n...[{'cast_id': 2, 'character': 'Ed Leland', 'cre...[{'credit_id': '58fd30ef925141643c02713f', 'de...21621000
24442445NaN0[{'id': 35, 'name': 'Comedy'}, {'id': 10402, '...NaNtt0086112enThe Pirates of PenzanceIn spite of being apprenticed to a Pirate King...2.555585...2/18/83112.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedNaNThe Pirates of PenzanceNaN[{'cast_id': 1, 'character': 'The Pirate King'...[{'credit_id': '57cf74b1c3a3684f230043e3', 'de...694497
16961697NaN0[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...NaNtt0100263frNikitaA beautiful felon, sentenced to life in prison...6.586401...2/21/90115.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedShe murders. So she can live.La Femme Nikita[{'id': 1308, 'name': 'secret identity'}, {'id...[{'cast_id': 1, 'character': 'Nikita', 'credit...[{'credit_id': '52fe44e7c3a36847f80b0f97', 'de...5000000
16901691NaN0[{'id': 28, 'name': 'Action'}]https://www.facebook.com/DeadLandsMoviett3399916enThe Dead LandsHongi, a Maori chieftain’s teenage son, must...10.001517...9/4/14108.0[{'iso_639_1': 'mi', 'name': ''}]ReleasedNaNThe Dead Lands[{'id': 2658, 'name': 'new zealand'}, {'id': 2...[{'cast_id': 2, 'character': 'Hongi', 'credit_...[{'credit_id': '53cf9579c3a368776d00734c', 'de...5240
16891690NaN0[{'id': 35, 'name': 'Comedy'}, {'id': 80, 'nam...NaNtt0069442frUne belle fille comme moiStanislas Previne is a young sociologist, prep...0.326486...9/13/7298.0[{'iso_639_1': 'fr', 'name': 'Français'}]ReleasedNaNA Gorgeous Girl Like Me[{'id': 4434, 'name': 'interview'}, {'id': 119...[{'cast_id': 6, 'character': 'Camille Bliss', ...[{'credit_id': '52fe43d3c3a36847f8072785', 'de...457163
166167[{'id': 173344, 'name': 'Qatsi Collection', 'p...0[{'id': 99, 'name': 'Documentary'}, {'id': 104...NaNtt0085809enKoyaanisqatsiTakes us to locations all around the US and sh...8.802715...6/6/8287.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedUntil now, you've never really seen the world ...Koyaanisqatsi[{'id': 1576, 'name': 'technology'}, {'id': 21...[{'cast_id': 10, 'character': 'Himself (uncred...[{'credit_id': '52fe44249251416c7502a35b', 'de...1723872
165166NaN0[{'id': 14, 'name': 'Fantasy'}, {'id': 28, 'na...NaNtt0099656enGhosts Can't Do ItElderly Scott kills himself after a heart atta...0.836108...1/1/8990.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedOnly her desire can make him rise againGhosts Can't Do ItNaN[{'cast_id': 1, 'character': \"Katie O'Dare Sco...[{'credit_id': '52fe44709251416c91010a89', 'de...25000
395396NaN0[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...http://greatbuckhowardmovie.com/tt0460810enThe Great Buck HowardWhen a law school dropout answers an advertise...7.957389...1/18/0887.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedGreatness is a state of mind.The Great Buck Howard[{'id': 2594, 'name': 'magic show'}, {'id': 36...[{'cast_id': 2, 'character': 'Buck Howard', 'c...[{'credit_id': '54ad745f9251417538000081', 'de...900689
24512452NaN0[{'id': 35, 'name': 'Comedy'}, {'id': 80, 'nam...NaNtt2402105enDom HemingwayAfter spending 12 years in prison for keeping ...5.713974...11/15/1393.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedI want my money plus interest.... and a presentDom Hemingway[{'id': 9825, 'name': 'growing up'}, {'id': 10...[{'cast_id': 2, 'character': 'Dom Hemingway', ...[{'credit_id': '548d71c592514122f90040b1', 'de...523511
16871688NaN0[{'id': 18, 'name': 'Drama'}]NaNtt0106438enBlueAgainst a plain, unchanging blue screen, a den...1.437622...8/19/9379.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedNaNBlue[{'id': 237, 'name': 'gay'}, {'id': 496, 'name...[{'cast_id': 2, 'character': '', 'credit_id': ...[{'credit_id': '53cb0c80c3a3687775000626', 'de...1718328
396397NaN0[{'id': 35, 'name': 'Comedy'}, {'id': 99, 'nam...http://kevinhartnation.com/portfolio/let-me-ex...tt2609912enKevin Hart: Let Me ExplainCaptures the laughter, energy and mayhem from ...11.110533...7/3/1374.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedWitness the rise of a legendKevin Hart: Let Me Explain[{'id': 9716, 'name': 'stand-up comedy'}, {'id...[{'cast_id': 2, 'character': 'Himself', 'credi...[{'credit_id': '5527585c92514115260005ef', 'de...32230907
16831684NaN0[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...NaNtt0090036enThe Slugger's WifeDarryl Palmer (Michael O'Keefe) is a major lea...0.000308...3/29/85105.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedNaNThe Slugger's WifeNaN[{'cast_id': 3, 'character': 'Darryl Palmer', ...[{'credit_id': '52fe499ac3a368484e13425b', 'de...1878561
161162NaN0[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...NaNtt0094846enCasual Sex?Two girls go away to a holiday resort looking ...1.319188...4/22/8888.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedStacy and Melissa are in search for the world'...Casual Sex?[{'id': 65, 'name': 'holiday'}, {'id': 572, 'n...[{'cast_id': 1, 'character': 'Stacy', 'credit_...[{'credit_id': '52fe456f9251416c910322d7', 'de...12277096
397398NaN0[{'id': 18, 'name': 'Drama'}, {'id': 27, 'name...NaNtt0097100enCommunionA novelist's wife and son see him changed by a...2.628306...11/10/89103.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThe true story of one man's terrifying journey...Communion[{'id': 255, 'name': 'male nudity'}, {'id': 81...[{'cast_id': 4, 'character': 'Whitley Strieber...[{'credit_id': '573a39e09251415571000a4c', 'de...1919653
24572458NaN0[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...NaNtt1595656enTo the WonderAfter falling in love in Paris, Marina and Nei...8.452485...2/22/13112.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedNaNTo the WonderNaN[{'cast_id': 4, 'character': 'Neil', 'credit_i...[{'credit_id': '5638a34a925141285701f217', 'de...587615
24582459NaN0[{'id': 18, 'name': 'Drama'}, {'id': 14, 'name...NaNtt3174376enBefore I WakeAbout an orphaned child whose dreams - and nig...9.449103...4/7/1697.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedFear your dreams.Before I Wake[{'id': 1566, 'name': 'dream'}, {'id': 3030, '...[{'cast_id': 1, 'character': 'Jessie', 'credit...[{'credit_id': '547b481f9251412d7f00007a', 'de...3295624
16671668NaN0[{'id': 53, 'name': 'Thriller'}, {'id': 9648, ...NaNtt0041959enThe Third ManSet in postwar Vienna, Austria, \"The Third Man...8.513889...8/31/49104.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedHunted by men ... Sought by WOMEN!The Third Man[{'id': 1201, 'name': 'austria'}, {'id': 1227,...[{'cast_id': 8, 'character': 'Holly Martins', ...[{'credit_id': '58d8fdf5c3a368121b065001', 'de...596349
16581659NaN0[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...http://www.kabluey.com/tt0816545enKablueyLeslie is left with few options when her husba...1.773963...6/25/0786.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedEvery family has a black sheep. This one is blue.Kabluey[{'id': 6732, 'name': 'mascot'}, {'id': 8438, ...[{'cast_id': 1, 'character': 'Leslie', 'credit...[{'credit_id': '52fe466f9251416c75078339', 'de...83398
16541655NaN0[{'id': 28, 'name': 'Action'}, {'id': 35, 'nam...http://jakal.co.kr/index.htmtt2652756ko자칼이 온다Clumsy and off-the-wall hit man Bong Min-jung ...1.322781...11/15/12107.0[{'iso_639_1': 'ko', 'name': '한국어/조선말'}]ReleasedNaNCode Name: JackalNaN[{'cast_id': 1, 'character': 'Bong Min-jung', ...[{'credit_id': '52fe4c089251416c910ed7a3', 'de...1261947
18211822NaN0[{'id': 99, 'name': 'Documentary'}]NaNtt0321376esBalserosThe story of Cuban refugees who risked their l...0.525869...4/12/02120.0[{'iso_639_1': 'es', 'name': 'Español'}]ReleasedNaNCuban Rafters[{'id': 701, 'name': 'cuba'}, {'id': 8158, 'na...[{'cast_id': 5, 'character': 'Herself (archive...[{'credit_id': '52fe456dc3a368484e05a2c3', 'de...62153
\n", "

3000 rows × 23 columns

\n", "
" ], "text/plain": [ " id belongs_to_collection budget \\\n", "2135 2136 [{'id': 295, 'name': 'Pirates of the Caribbean... 380000000 \n", "2209 2210 [{'id': 295, 'name': 'Pirates of the Caribbean... 300000000 \n", "2770 2771 [{'id': 86311, 'name': 'The Avengers Collectio... 280000000 \n", "1425 1426 [{'id': 8537, 'name': 'Superman Collection', '... 270000000 \n", "1270 1271 NaN 260000000 \n", "1630 1631 [{'id': 8650, 'name': 'Transformers Collection... 260000000 \n", "2532 2533 [{'id': 121938, 'name': 'The Hobbit Collection... 250000000 \n", "961 962 [{'id': 1241, 'name': 'Harry Potter Collection... 250000000 \n", "906 907 [{'id': 263, 'name': 'The Dark Knight Collecti... 250000000 \n", "666 667 [{'id': 121938, 'name': 'The Hobbit Collection... 250000000 \n", "2737 2738 [{'id': 645, 'name': 'James Bond Collection', ... 245000000 \n", "1249 1250 [{'id': 420, 'name': 'The Chronicles of Narnia... 225000000 \n", "2357 2358 [{'id': 209131, 'name': 'Man of Steel Collecti... 225000000 \n", "1126 1127 [{'id': 86311, 'name': 'The Avengers Collectio... 220000000 \n", "1880 1881 [{'id': 748, 'name': 'X-Men Collection', 'post... 210000000 \n", "2035 2036 NaN 207000000 \n", "1222 1223 [{'id': 87118, 'name': 'Cars Collection', 'pos... 200000000 \n", "763 764 [{'id': 416468, 'name': 'World War Z Collectio... 200000000 \n", "1811 1812 [{'id': 528, 'name': 'The Terminator Collectio... 200000000 \n", "881 882 [{'id': 261307, 'name': 'Alice in Wonderland C... 200000000 \n", "2562 2563 [{'id': 137697, 'name': 'Finding Nemo Collecti... 200000000 \n", "1838 1839 [{'id': 645, 'name': 'James Bond Collection', ... 200000000 \n", "2126 2127 [{'id': 284433, 'name': 'Guardians of the Gala... 200000000 \n", "2623 2624 NaN 197471676 \n", "927 928 NaN 195000000 \n", "2322 2323 [{'id': 8650, 'name': 'Transformers Collection... 195000000 \n", "1394 1395 NaN 190000000 \n", "1761 1762 [{'id': 9485, 'name': 'The Fast and the Furiou... 190000000 \n", "1673 1674 [{'id': 263, 'name': 'The Dark Knight Collecti... 185000000 \n", "902 903 [{'id': 84, 'name': 'Indiana Jones Collection'... 185000000 \n", "... ... ... ... \n", "2484 2485 [{'id': 10457, 'name': 'Once Upon a Time in Ch... 0 \n", "152 153 NaN 0 \n", "151 152 NaN 0 \n", "1595 1596 NaN 0 \n", "1638 1639 [{'id': 102019, 'name': 'Death Note Collection... 0 \n", "1645 1646 NaN 0 \n", "2463 2464 NaN 0 \n", "1651 1652 [{'id': 269098, 'name': 'Police Story Collecti... 0 \n", "1701 1702 NaN 0 \n", "1700 1701 NaN 0 \n", "1697 1698 NaN 0 \n", "2444 2445 NaN 0 \n", "1696 1697 NaN 0 \n", "1690 1691 NaN 0 \n", "1689 1690 NaN 0 \n", "166 167 [{'id': 173344, 'name': 'Qatsi Collection', 'p... 0 \n", "165 166 NaN 0 \n", "395 396 NaN 0 \n", "2451 2452 NaN 0 \n", "1687 1688 NaN 0 \n", "396 397 NaN 0 \n", "1683 1684 NaN 0 \n", "161 162 NaN 0 \n", "397 398 NaN 0 \n", "2457 2458 NaN 0 \n", "2458 2459 NaN 0 \n", "1667 1668 NaN 0 \n", "1658 1659 NaN 0 \n", "1654 1655 NaN 0 \n", "1821 1822 NaN 0 \n", "\n", " genres \\\n", "2135 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... \n", "2209 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "2770 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n", "1425 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "1270 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n", "1630 [{'id': 28, 'name': 'Action'}, {'id': 878, 'na... \n", "2532 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "961 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "906 [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam... \n", "666 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "2737 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n", "1249 [{'id': 12, 'name': 'Adventure'}, {'id': 10751... \n", "2357 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n", "1126 [{'id': 878, 'name': 'Science Fiction'}, {'id'... \n", "1880 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... \n", "2035 [{'id': 12, 'name': 'Adventure'}, {'id': 18, '... \n", "1222 [{'id': 16, 'name': 'Animation'}, {'id': 10751... \n", "763 [{'id': 28, 'name': 'Action'}, {'id': 18, 'nam... \n", "1811 [{'id': 28, 'name': 'Action'}, {'id': 53, 'nam... \n", "881 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... \n", "2562 [{'id': 12, 'name': 'Adventure'}, {'id': 16, '... \n", "1838 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... \n", "2126 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n", "2623 [{'id': 12, 'name': 'Adventure'}, {'id': 878, ... \n", "927 [{'id': 28, 'name': 'Action'}, {'id': 10751, '... \n", "2322 [{'id': 28, 'name': 'Action'}, {'id': 878, 'na... \n", "1394 [{'id': 12, 'name': 'Adventure'}, {'id': 10751... \n", "1761 [{'id': 28, 'name': 'Action'}] \n", "1673 [{'id': 18, 'name': 'Drama'}, {'id': 28, 'name... \n", "902 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... \n", "... ... \n", "2484 [{'id': 28, 'name': 'Action'}, {'id': 35, 'nam... \n", "152 [{'id': 18, 'name': 'Drama'}, {'id': 14, 'name... \n", "151 [{'id': 99, 'name': 'Documentary'}] \n", "1595 [{'id': 53, 'name': 'Thriller'}, {'id': 878, '... \n", "1638 [{'id': 14, 'name': 'Fantasy'}, {'id': 28, 'na... \n", "1645 [{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n... \n", "2463 [{'id': 18, 'name': 'Drama'}] \n", "1651 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n", "1701 [{'id': 99, 'name': 'Documentary'}] \n", "1700 [{'id': 18, 'name': 'Drama'}] \n", "1697 [{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n... \n", "2444 [{'id': 35, 'name': 'Comedy'}, {'id': 10402, '... \n", "1696 [{'id': 28, 'name': 'Action'}, {'id': 53, 'nam... \n", "1690 [{'id': 28, 'name': 'Action'}] \n", "1689 [{'id': 35, 'name': 'Comedy'}, {'id': 80, 'nam... \n", "166 [{'id': 99, 'name': 'Documentary'}, {'id': 104... \n", "165 [{'id': 14, 'name': 'Fantasy'}, {'id': 28, 'na... \n", "395 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n", "2451 [{'id': 35, 'name': 'Comedy'}, {'id': 80, 'nam... \n", "1687 [{'id': 18, 'name': 'Drama'}] \n", "396 [{'id': 35, 'name': 'Comedy'}, {'id': 99, 'nam... \n", "1683 [{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n... \n", "161 [{'id': 35, 'name': 'Comedy'}, {'id': 10749, '... \n", "397 [{'id': 18, 'name': 'Drama'}, {'id': 27, 'name... \n", "2457 [{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n... \n", "2458 [{'id': 18, 'name': 'Drama'}, {'id': 14, 'name... \n", "1667 [{'id': 53, 'name': 'Thriller'}, {'id': 9648, ... \n", "1658 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n", "1654 [{'id': 28, 'name': 'Action'}, {'id': 35, 'nam... \n", "1821 [{'id': 99, 'name': 'Documentary'}] \n", "\n", " homepage imdb_id \\\n", "2135 http://disney.go.com/pirates/index-on-stranger... tt1298650 \n", "2209 http://disney.go.com/disneypictures/pirates/ tt0449088 \n", "2770 http://marvel.com/movies/movie/193/avengers_ag... tt2395427 \n", "1425 http://www.superman.com tt0348150 \n", "1270 http://movies.disney.com/john-carter tt0401729 \n", "1630 http://www.transformersmovie.com/ tt3371366 \n", "2532 http://www.thehobbit.com/ tt0903624 \n", "961 http://harrypotter.warnerbros.com/harrypottera... tt0417741 \n", "906 http://www.thedarkknightrises.com/ tt1345836 \n", "666 http://www.thehobbit.com/ tt1170358 \n", "2737 http://www.sonypictures.com/movies/spectre/ tt2379713 \n", "1249 NaN tt0499448 \n", "2357 http://www.manofsteel.com/ tt0770828 \n", "1126 http://marvel.com/avengers_movie/ tt0848228 \n", "1880 NaN tt0376994 \n", "2035 NaN tt0360717 \n", "1222 http://www.disney.go.com/cars/ tt1216475 \n", "763 http://www.worldwarzmovie.com tt0816711 \n", "1811 NaN tt0181852 \n", "881 http://disney.go.com/wonderland/ tt1014759 \n", "2562 http://movies.disney.com/finding-dory tt2277860 \n", "1838 http://www.mgm.com/view/movie/234/Quantum-of-S... tt0830515 \n", "2126 http://marvel.com/movies/movie/221/guardians_o... tt3896198 \n", "2623 NaN tt2239822 \n", "927 http://jackthegiantkiller.warnerbros.com tt1351685 \n", "2322 http://www.transformersmovie.com/ tt1399103 \n", "1394 http://movies.disney.com/tomorrowland tt1964418 \n", "1761 http://www.furious7.com/ tt2820852 \n", "1673 http://thedarkknight.warnerbros.com/dvdsite/ tt0468569 \n", "902 http://www.indianajones.com/site/index.html tt0367882 \n", "... ... ... \n", "2484 NaN tt0105839 \n", "152 NaN tt0098061 \n", "151 NaN tt5278464 \n", "1595 NaN tt2866360 \n", "1638 http://wwws.warnerbros.co.jp/L-movie tt0912597 \n", "1645 NaN tt0117364 \n", "2463 NaN tt4428814 \n", "1651 NaN tt0116704 \n", "1701 NaN tt3013258 \n", "1700 NaN tt0104756 \n", "1697 NaN tt0105391 \n", "2444 NaN tt0086112 \n", "1696 NaN tt0100263 \n", "1690 https://www.facebook.com/DeadLandsMovie tt3399916 \n", "1689 NaN tt0069442 \n", "166 NaN tt0085809 \n", "165 NaN tt0099656 \n", "395 http://greatbuckhowardmovie.com/ tt0460810 \n", "2451 NaN tt2402105 \n", "1687 NaN tt0106438 \n", "396 http://kevinhartnation.com/portfolio/let-me-ex... tt2609912 \n", "1683 NaN tt0090036 \n", "161 NaN tt0094846 \n", "397 NaN tt0097100 \n", "2457 NaN tt1595656 \n", "2458 NaN tt3174376 \n", "1667 NaN tt0041959 \n", "1658 http://www.kabluey.com/ tt0816545 \n", "1654 http://jakal.co.kr/index.htm tt2652756 \n", "1821 NaN tt0321376 \n", "\n", " original_language original_title \\\n", "2135 en Pirates of the Caribbean: On Stranger Tides \n", "2209 en Pirates of the Caribbean: At World's End \n", "2770 en Avengers: Age of Ultron \n", "1425 en Superman Returns \n", "1270 en John Carter \n", "1630 en Transformers: The Last Knight \n", "2532 en The Hobbit: An Unexpected Journey \n", "961 en Harry Potter and the Half-Blood Prince \n", "906 en The Dark Knight Rises \n", "666 en The Hobbit: The Desolation of Smaug \n", "2737 en Spectre \n", "1249 en The Chronicles of Narnia: Prince Caspian \n", "2357 en Man of Steel \n", "1126 en The Avengers \n", "1880 en X-Men: The Last Stand \n", "2035 en King Kong \n", "1222 en Cars 2 \n", "763 en World War Z \n", "1811 en Terminator 3: Rise of the Machines \n", "881 en Alice in Wonderland \n", "2562 en Finding Dory \n", "1838 en Quantum of Solace \n", "2126 en Guardians of the Galaxy Vol. 2 \n", "2623 en Valerian and the City of a Thousand Planets \n", "927 en Jack the Giant Slayer \n", "2322 en Transformers: Dark of the Moon \n", "1394 en Tomorrowland \n", "1761 en Furious 7 \n", "1673 en The Dark Knight \n", "902 en Indiana Jones and the Kingdom of the Crystal S... \n", "... ... ... \n", "2484 cn 黃飛鴻之二男兒當自強 \n", "152 en Paperhouse \n", "151 en Holy Hell \n", "1595 en Coherence \n", "1638 ja L: change the WorLd \n", "1645 en The Portrait of a Lady \n", "2463 fr La Loi du marché \n", "1651 cn 警察故事4之簡單任務 \n", "1701 fr Sur le chemin de l'école \n", "1700 en Lorenzo's Oil \n", "1697 en Shining Through \n", "2444 en The Pirates of Penzance \n", "1696 fr Nikita \n", "1690 en The Dead Lands \n", "1689 fr Une belle fille comme moi \n", "166 en Koyaanisqatsi \n", "165 en Ghosts Can't Do It \n", "395 en The Great Buck Howard \n", "2451 en Dom Hemingway \n", "1687 en Blue \n", "396 en Kevin Hart: Let Me Explain \n", "1683 en The Slugger's Wife \n", "161 en Casual Sex? \n", "397 en Communion \n", "2457 en To the Wonder \n", "2458 en Before I Wake \n", "1667 en The Third Man \n", "1658 en Kabluey \n", "1654 ko 자칼이 온다 \n", "1821 es Balseros \n", "\n", " overview popularity \\\n", "2135 Captain Jack Sparrow crosses paths with a woma... 27.887720 \n", "2209 Captain Barbossa, long believed to be dead, ha... 31.363664 \n", "2770 When Tony Stark tries to jumpstart a dormant p... 37.379420 \n", "1425 Superman returns to discover his 5-year absenc... 13.284712 \n", "1270 John Carter is a war-weary, former military ca... 14.670353 \n", "1630 Autobots and Decepticons are at war, with huma... 39.186819 \n", "2532 Bilbo Baggins, a hobbit enjoying his quiet lif... 23.253089 \n", "961 As Harry begins his sixth year at Hogwarts, he... 19.083723 \n", "906 Following the death of District Attorney Harve... 20.582580 \n", "666 The Dwarves, Bilbo and Gandalf have successful... 20.644776 \n", "2737 A cryptic message from Bond‚Äôs past sends him... 24.926577 \n", "1249 One year after their incredible adventures in ... 12.490891 \n", "2357 A young boy learns that he has extraordinary p... 18.538834 \n", "1126 When an unexpected enemy emerges and threatens... 89.887648 \n", "1880 When a cure is found to treat mutations, lines... 0.867943 \n", "2035 In 1933 New York, an overly ambitious movie pr... 19.761164 \n", "1222 Star race car Lightning McQueen and his pal Ma... 13.693002 \n", "763 Life for former United Nations investigator Ge... 26.114917 \n", "1811 It's been 10 years since John Connor saved Ear... 20.818907 \n", "881 Alice, an unpretentious and individual 19-year... 17.285093 \n", "2562 Dory is reunited with her friends Nemo and Mar... 14.477677 \n", "1838 Quantum of Solace continues the adventures of ... 13.678120 \n", "2126 The Guardians must fight to keep their newfoun... 185.330992 \n", "2623 In the 28th century, Valerian and Laureline ar... 15.262706 \n", "927 The story of an ancient war that is reignited ... 11.606061 \n", "2322 Sam Witwicky takes his first tenuous steps int... 4.503505 \n", "1394 Bound by a shared destiny, a bright, optimisti... 22.296076 \n", "1761 Deckard Shaw seeks revenge against Dominic Tor... 27.275687 \n", "1673 Batman raises the stakes in his war on crime. ... 123.167259 \n", "902 Set during the Cold War, the Soviets ‚Äì led b... 12.577266 \n", "... ... ... \n", "2484 In the sequel to the Tsui Hark classic, Wong F... 3.606688 \n", "152 Anna is becoming lost in the loneliness of her... 3.520171 \n", "151 An inside look at a West Hollywood cult formed... 1.279443 \n", "1595 On the night a comet is passing near Earth, ei... 6.851243 \n", "1638 \"The human whose name is written in the Death ... 8.271179 \n", "1645 Ms. Isabel Archer isn't afraid to challenge so... 4.139050 \n", "2463 At the age of 51 and after 20 months on unempl... 4.778045 \n", "1651 Jackie Chan reprises his role as Chan Ka-Kui (... 9.206356 \n", "1701 These children live in the four corners of the... 1.784682 \n", "1700 Lorenzo Odone was a normal child until the age... 9.968395 \n", "1697 Spirited New Yorker Linda Voss goes to work fo... 5.860319 \n", "2444 In spite of being apprenticed to a Pirate King... 2.555585 \n", "1696 A beautiful felon, sentenced to life in prison... 6.586401 \n", "1690 Hongi, a Maori chieftain‚Äôs teenage son, must... 10.001517 \n", "1689 Stanislas Previne is a young sociologist, prep... 0.326486 \n", "166 Takes us to locations all around the US and sh... 8.802715 \n", "165 Elderly Scott kills himself after a heart atta... 0.836108 \n", "395 When a law school dropout answers an advertise... 7.957389 \n", "2451 After spending 12 years in prison for keeping ... 5.713974 \n", "1687 Against a plain, unchanging blue screen, a den... 1.437622 \n", "396 Captures the laughter, energy and mayhem from ... 11.110533 \n", "1683 Darryl Palmer (Michael O'Keefe) is a major lea... 0.000308 \n", "161 Two girls go away to a holiday resort looking ... 1.319188 \n", "397 A novelist's wife and son see him changed by a... 2.628306 \n", "2457 After falling in love in Paris, Marina and Nei... 8.452485 \n", "2458 About an orphaned child whose dreams - and nig... 9.449103 \n", "1667 Set in postwar Vienna, Austria, \"The Third Man... 8.513889 \n", "1658 Leslie is left with few options when her husba... 1.773963 \n", "1654 Clumsy and off-the-wall hit man Bong Min-jung ... 1.322781 \n", "1821 The story of Cuban refugees who risked their l... 0.525869 \n", "\n", " ... release_date runtime \\\n", "2135 ... 5/14/11 136.0 \n", "2209 ... 5/19/07 169.0 \n", "2770 ... 4/22/15 141.0 \n", "1425 ... 6/28/06 154.0 \n", "1270 ... 3/7/12 132.0 \n", "1630 ... 6/21/17 149.0 \n", "2532 ... 11/26/12 169.0 \n", "961 ... 7/7/09 153.0 \n", "906 ... 7/16/12 165.0 \n", "666 ... 12/11/13 161.0 \n", "2737 ... 10/26/15 148.0 \n", "1249 ... 5/15/08 150.0 \n", "2357 ... 6/12/13 143.0 \n", "1126 ... 4/25/12 143.0 \n", "1880 ... 5/24/06 104.0 \n", "2035 ... 12/14/05 187.0 \n", "1222 ... 6/11/11 106.0 \n", "763 ... 6/20/13 116.0 \n", "1811 ... 7/2/03 109.0 \n", "881 ... 3/3/10 108.0 \n", "2562 ... 6/16/16 97.0 \n", "1838 ... 10/30/08 106.0 \n", "2126 ... 4/19/17 137.0 \n", "2623 ... 7/20/17 137.0 \n", "927 ... 2/27/13 114.0 \n", "2322 ... 6/28/11 154.0 \n", "1394 ... 5/19/15 130.0 \n", "1761 ... 4/1/15 137.0 \n", "1673 ... 7/16/08 152.0 \n", "902 ... 5/21/08 122.0 \n", "... ... ... ... \n", "2484 ... 9/16/92 113.0 \n", "152 ... 1/1/88 92.0 \n", "151 ... 1/25/16 100.0 \n", "1595 ... 9/19/13 89.0 \n", "1638 ... 1/31/08 129.0 \n", "1645 ... 10/18/96 144.0 \n", "2463 ... 3/16/15 93.0 \n", "1651 ... 2/10/96 107.0 \n", "1701 ... 9/25/13 77.0 \n", "1700 ... 12/30/92 135.0 \n", "1697 ... 1/31/92 132.0 \n", "2444 ... 2/18/83 112.0 \n", "1696 ... 2/21/90 115.0 \n", "1690 ... 9/4/14 108.0 \n", "1689 ... 9/13/72 98.0 \n", "166 ... 6/6/82 87.0 \n", "165 ... 1/1/89 90.0 \n", "395 ... 1/18/08 87.0 \n", "2451 ... 11/15/13 93.0 \n", "1687 ... 8/19/93 79.0 \n", "396 ... 7/3/13 74.0 \n", "1683 ... 3/29/85 105.0 \n", "161 ... 4/22/88 88.0 \n", "397 ... 11/10/89 103.0 \n", "2457 ... 2/22/13 112.0 \n", "2458 ... 4/7/16 97.0 \n", "1667 ... 8/31/49 104.0 \n", "1658 ... 6/25/07 86.0 \n", "1654 ... 11/15/12 107.0 \n", "1821 ... 4/12/02 120.0 \n", "\n", " spoken_languages status \\\n", "2135 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "2209 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2770 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1425 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "1270 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1630 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2532 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "961 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "906 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "666 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2737 [{'iso_639_1': 'fr', 'name': 'Français'}, {'is... Released \n", "1249 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2357 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1126 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1880 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2035 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1222 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "763 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1811 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "881 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2562 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1838 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "2126 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2623 [{'iso_639_1': 'fr', 'name': 'Français'}, {'is... Released \n", "927 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2322 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1394 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1761 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1673 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "902 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "... ... ... \n", "2484 [{'iso_639_1': 'cn', 'name': '广州话 / 廣州話'}, {'i... Released \n", "152 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "151 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1595 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1638 [{'iso_639_1': 'ja', 'name': '日本語'}] Released \n", "1645 [{'iso_639_1': 'it', 'name': 'Italiano'}, {'is... Released \n", "2463 [{'iso_639_1': 'fr', 'name': 'Français'}] Released \n", "1651 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "1701 [{'iso_639_1': 'fr', 'name': 'Français'}] Released \n", "1700 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "1697 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2444 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1696 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "1690 [{'iso_639_1': 'mi', 'name': ''}] Released \n", "1689 [{'iso_639_1': 'fr', 'name': 'Français'}] Released \n", "166 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "165 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "395 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2451 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1687 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "396 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1683 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "161 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "397 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2457 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2458 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1667 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "1658 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1654 [{'iso_639_1': 'ko', 'name': '한국어/조선말'}] Released \n", "1821 [{'iso_639_1': 'es', 'name': 'Español'}] Released \n", "\n", " tagline \\\n", "2135 Live Forever Or Die Trying. \n", "2209 At the end of the world, the adventure begins. \n", "2770 A New Age Has Come. \n", "1425 NaN \n", "1270 Lost in our world, found in another. \n", "1630 For one world to live, the other must die. \n", "2532 From the smallest beginnings come the greatest... \n", "961 Dark Secrets Revealed \n", "906 The Legend Ends \n", "666 Beyond darkness... beyond desolation... lies t... \n", "2737 A Plan No One Escapes \n", "1249 Hope has a new face. \n", "2357 You will believe that a man can fly. \n", "1126 Some assembly required. \n", "1880 Take a Stand \n", "2035 The eighth wonder of the world. \n", "1222 Ka-ciao! \n", "763 Remember Philly! \n", "1811 The Machines Will Rise. \n", "881 You're invited to a very important date. \n", "2562 An unforgettable journey she probably won't re... \n", "1838 For love, for hate, for justice, for revenge. \n", "2126 Obviously. \n", "2623 NaN \n", "927 Prepare for a giant adventure \n", "2322 The invasion we always feared. An enemy we nev... \n", "1394 Imagine a world where nothing is impossible. \n", "1761 Vengeance Hits Home \n", "1673 Why So Serious? \n", "902 The adventure continues . . . \n", "... ... \n", "2484 NaN \n", "152 A drawing that became a dream. A dream that be... \n", "151 NaN \n", "1595 Nothing is random \n", "1638 The final death note showdown! \n", "1645 Based on the Novel by Henry James. \n", "2463 NaN \n", "1651 They thought they possessed the ultimate weapo... \n", "1701 NaN \n", "1700 Some people make their own miracles. \n", "1697 He needed to trust her with his secret. She ha... \n", "2444 NaN \n", "1696 She murders. So she can live. \n", "1690 NaN \n", "1689 NaN \n", "166 Until now, you've never really seen the world ... \n", "165 Only her desire can make him rise again \n", "395 Greatness is a state of mind. \n", "2451 I want my money plus interest.... and a present \n", "1687 NaN \n", "396 Witness the rise of a legend \n", "1683 NaN \n", "161 Stacy and Melissa are in search for the world'... \n", "397 The true story of one man's terrifying journey... \n", "2457 NaN \n", "2458 Fear your dreams. \n", "1667 Hunted by men ... Sought by WOMEN! \n", "1658 Every family has a black sheep. This one is blue. \n", "1654 NaN \n", "1821 NaN \n", "\n", " title \\\n", "2135 Pirates of the Caribbean: On Stranger Tides \n", "2209 Pirates of the Caribbean: At World's End \n", "2770 Avengers: Age of Ultron \n", "1425 Superman Returns \n", "1270 John Carter \n", "1630 Transformers: The Last Knight \n", "2532 The Hobbit: An Unexpected Journey \n", "961 Harry Potter and the Half-Blood Prince \n", "906 The Dark Knight Rises \n", "666 The Hobbit: The Desolation of Smaug \n", "2737 Spectre \n", "1249 The Chronicles of Narnia: Prince Caspian \n", "2357 Man of Steel \n", "1126 The Avengers \n", "1880 X-Men: The Last Stand \n", "2035 King Kong \n", "1222 Cars 2 \n", "763 World War Z \n", "1811 Terminator 3: Rise of the Machines \n", "881 Alice in Wonderland \n", "2562 Finding Dory \n", "1838 Quantum of Solace \n", "2126 Guardians of the Galaxy Vol. 2 \n", "2623 Valerian and the City of a Thousand Planets \n", "927 Jack the Giant Slayer \n", "2322 Transformers: Dark of the Moon \n", "1394 Tomorrowland \n", "1761 Furious 7 \n", "1673 The Dark Knight \n", "902 Indiana Jones and the Kingdom of the Crystal S... \n", "... ... \n", "2484 Once Upon a Time in China II \n", "152 Paperhouse \n", "151 Holy Hell \n", "1595 Coherence \n", "1638 L: Change the World \n", "1645 The Portrait of a Lady \n", "2463 The Measure of a Man \n", "1651 First Strike \n", "1701 On the Way to School \n", "1700 Lorenzo's Oil \n", "1697 Shining Through \n", "2444 The Pirates of Penzance \n", "1696 La Femme Nikita \n", "1690 The Dead Lands \n", "1689 A Gorgeous Girl Like Me \n", "166 Koyaanisqatsi \n", "165 Ghosts Can't Do It \n", "395 The Great Buck Howard \n", "2451 Dom Hemingway \n", "1687 Blue \n", "396 Kevin Hart: Let Me Explain \n", "1683 The Slugger's Wife \n", "161 Casual Sex? \n", "397 Communion \n", "2457 To the Wonder \n", "2458 Before I Wake \n", "1667 The Third Man \n", "1658 Kabluey \n", "1654 Code Name: Jackal \n", "1821 Cuban Rafters \n", "\n", " Keywords \\\n", "2135 [{'id': 658, 'name': 'sea'}, {'id': 1316, 'nam... \n", "2209 [{'id': 270, 'name': 'ocean'}, {'id': 726, 'na... \n", "2770 [{'id': 8828, 'name': 'marvel comic'}, {'id': ... \n", "1425 [{'id': 83, 'name': 'saving the world'}, {'id'... \n", "1270 [{'id': 818, 'name': 'based on novel'}, {'id':... \n", "1630 [{'id': 10466, 'name': 'knight'}, {'id': 10607... \n", "2532 [{'id': 483, 'name': 'riddle'}, {'id': 603, 'n... \n", "961 [{'id': 616, 'name': 'witch'}, {'id': 2343, 'n... \n", "906 [{'id': 849, 'name': 'dc comics'}, {'id': 853,... \n", "666 [{'id': 603, 'name': 'elves'}, {'id': 604, 'na... \n", "2737 [{'id': 470, 'name': 'spy'}, {'id': 818, 'name... \n", "1249 [{'id': 818, 'name': 'based on novel'}, {'id':... \n", "2357 [{'id': 83, 'name': 'saving the world'}, {'id'... \n", "1126 [{'id': 242, 'name': 'new york'}, {'id': 5539,... \n", "1880 [{'id': 1852, 'name': 'mutant'}, {'id': 8828, ... \n", "2035 [{'id': 774, 'name': 'film business'}, {'id': ... \n", "1222 [{'id': 830, 'name': 'car race'}, {'id': 9663,... \n", "763 [{'id': 4565, 'name': 'dystopia'}, {'id': 1233... \n", "1811 [{'id': 83, 'name': 'saving the world'}, {'id'... \n", "881 [{'id': 818, 'name': 'based on novel'}, {'id':... \n", "2562 [{'id': 1357, 'name': 'fish'}, {'id': 1453, 'n... \n", "1838 [{'id': 627, 'name': 'killing'}, {'id': 1568, ... \n", "2126 [{'id': 9663, 'name': 'sequel'}, {'id': 9715, ... \n", "2623 [{'id': 1370, 'name': 'shapeshifting'}, {'id':... \n", "927 [{'id': 179411, 'name': 'based on fairy tale'}... \n", "2322 [{'id': 305, 'name': 'moon'}, {'id': 1612, 'na... \n", "1394 [{'id': 1436, 'name': 'inventor'}, {'id': 1233... \n", "1761 [{'id': 830, 'name': 'car race'}, {'id': 3428,... \n", "1673 [{'id': 849, 'name': 'dc comics'}, {'id': 853,... \n", "902 [{'id': 83, 'name': 'saving the world'}, {'id'... \n", "... ... \n", "2484 [{'id': 478, 'name': 'china'}, {'id': 779, 'na... \n", "152 [{'id': 3030, 'name': 'nightmare'}, {'id': 609... \n", "151 [{'id': 162766, 'name': 'religious cult'}, {'i... \n", "1595 [{'id': 2340, 'name': 'paranoia'}, {'id': 4803... \n", "1638 [{'id': 9663, 'name': 'sequel'}] \n", "1645 [{'id': 1295, 'name': 'europe'}, {'id': 6054, ... \n", "2463 [{'id': 156052, 'name': 'unemployment'}] \n", "1651 [{'id': 720, 'name': 'helicopter'}, {'id': 779... \n", "1701 NaN \n", "1700 [{'id': 970, 'name': 'parent child relationshi... \n", "1697 [{'id': 74, 'name': 'germany'}, {'id': 818, 'n... \n", "2444 NaN \n", "1696 [{'id': 1308, 'name': 'secret identity'}, {'id... \n", "1690 [{'id': 2658, 'name': 'new zealand'}, {'id': 2... \n", "1689 [{'id': 4434, 'name': 'interview'}, {'id': 119... \n", "166 [{'id': 1576, 'name': 'technology'}, {'id': 21... \n", "165 NaN \n", "395 [{'id': 2594, 'name': 'magic show'}, {'id': 36... \n", "2451 [{'id': 9825, 'name': 'growing up'}, {'id': 10... \n", "1687 [{'id': 237, 'name': 'gay'}, {'id': 496, 'name... \n", "396 [{'id': 9716, 'name': 'stand-up comedy'}, {'id... \n", "1683 NaN \n", "161 [{'id': 65, 'name': 'holiday'}, {'id': 572, 'n... \n", "397 [{'id': 255, 'name': 'male nudity'}, {'id': 81... \n", "2457 NaN \n", "2458 [{'id': 1566, 'name': 'dream'}, {'id': 3030, '... \n", "1667 [{'id': 1201, 'name': 'austria'}, {'id': 1227,... \n", "1658 [{'id': 6732, 'name': 'mascot'}, {'id': 8438, ... \n", "1654 NaN \n", "1821 [{'id': 701, 'name': 'cuba'}, {'id': 8158, 'na... \n", "\n", " cast \\\n", "2135 [{'cast_id': 15, 'character': 'Captain Jack Sp... \n", "2209 [{'cast_id': 4, 'character': 'Captain Jack Spa... \n", "2770 [{'cast_id': 76, 'character': 'Tony Stark / Ir... \n", "1425 [{'cast_id': 3, 'character': 'Superman / Clark... \n", "1270 [{'cast_id': 5, 'character': 'John Carter', 'c... \n", "1630 [{'cast_id': 2, 'character': 'Cade Yeager', 'c... \n", "2532 [{'cast_id': 6, 'character': 'Gandalf', 'credi... \n", "961 [{'cast_id': 3, 'character': 'Harry Potter', '... \n", "906 NaN \n", "666 [{'cast_id': 3, 'character': 'Bilbo Baggins', ... \n", "2737 [{'cast_id': 1, 'character': 'James Bond', 'cr... \n", "1249 [{'cast_id': 1, 'character': 'Prince Caspian',... \n", "2357 [{'cast_id': 2, 'character': 'Clark Kent / Kal... \n", "1126 [{'cast_id': 46, 'character': 'Tony Stark / Ir... \n", "1880 [{'cast_id': 4, 'character': 'Logan / Wolverin... \n", "2035 [{'cast_id': 5, 'character': 'Ann Darrow', 'cr... \n", "1222 [{'cast_id': 4, 'character': 'Lightning McQuee... \n", "763 [{'cast_id': 4, 'character': 'Gerry Lane', 'cr... \n", "1811 [{'cast_id': 6, 'character': 'The Terminator',... \n", "881 [{'cast_id': 7, 'character': 'Alice Kingsleigh... \n", "2562 [{'cast_id': 2, 'character': 'Dory (voice)', '... \n", "1838 [{'cast_id': 1, 'character': 'James Bond', 'cr... \n", "2126 [{'cast_id': 3, 'character': 'Peter Quill / St... \n", "2623 NaN \n", "927 [{'cast_id': 7, 'character': 'Jack', 'credit_i... \n", "2322 [{'cast_id': 3, 'character': 'Sam Witwicky', '... \n", "1394 [{'cast_id': 17, 'character': 'Casey Newton', ... \n", "1761 [{'cast_id': 17, 'character': 'Dominic Toretto... \n", "1673 [{'cast_id': 35, 'character': 'Bruce Wayne / B... \n", "902 [{'cast_id': 4, 'character': 'Indiana Jones', ... \n", "... ... \n", "2484 [{'cast_id': 13, 'character': 'Wong Fei-hung',... \n", "152 [{'cast_id': 9, 'character': 'Anna Madden', 'c... \n", "151 [{'cast_id': 7, 'character': 'Himself', 'credi... \n", "1595 [{'cast_id': 10, 'character': 'Hugh', 'credit_... \n", "1638 [{'cast_id': 1, 'character': 'Watari', 'credit... \n", "1645 [{'cast_id': 1, 'character': 'Isabel Archer', ... \n", "2463 [{'cast_id': 1, 'character': 'Thierry Taugourd... \n", "1651 [{'cast_id': 1, 'character': 'Insp. Chan Ka Ku... \n", "1701 [{'cast_id': 5, 'character': 'Elle-m√™me', 'cr... \n", "1700 [{'cast_id': 13, 'character': 'Michaela Odone'... \n", "1697 [{'cast_id': 2, 'character': 'Ed Leland', 'cre... \n", "2444 [{'cast_id': 1, 'character': 'The Pirate King'... \n", "1696 [{'cast_id': 1, 'character': 'Nikita', 'credit... \n", "1690 [{'cast_id': 2, 'character': 'Hongi', 'credit_... \n", "1689 [{'cast_id': 6, 'character': 'Camille Bliss', ... \n", "166 [{'cast_id': 10, 'character': 'Himself (uncred... \n", "165 [{'cast_id': 1, 'character': \"Katie O'Dare Sco... \n", "395 [{'cast_id': 2, 'character': 'Buck Howard', 'c... \n", "2451 [{'cast_id': 2, 'character': 'Dom Hemingway', ... \n", "1687 [{'cast_id': 2, 'character': '', 'credit_id': ... \n", "396 [{'cast_id': 2, 'character': 'Himself', 'credi... \n", "1683 [{'cast_id': 3, 'character': 'Darryl Palmer', ... \n", "161 [{'cast_id': 1, 'character': 'Stacy', 'credit_... \n", "397 [{'cast_id': 4, 'character': 'Whitley Strieber... \n", "2457 [{'cast_id': 4, 'character': 'Neil', 'credit_i... \n", "2458 [{'cast_id': 1, 'character': 'Jessie', 'credit... \n", "1667 [{'cast_id': 8, 'character': 'Holly Martins', ... \n", "1658 [{'cast_id': 1, 'character': 'Leslie', 'credit... \n", "1654 [{'cast_id': 1, 'character': 'Bong Min-jung', ... \n", "1821 [{'cast_id': 5, 'character': 'Herself (archive... \n", "\n", " crew revenue \n", "2135 [{'credit_id': '566b4f54c3a3683f56005151', 'de... 1045713802 \n", "2209 [{'credit_id': '52fe4232c3a36847f800b579', 'de... 961000000 \n", "2770 [{'credit_id': '55d5f7d4c3a3683e7e0016eb', 'de... 1405403694 \n", "1425 [{'credit_id': '553bef6a9251416874003c8f', 'de... 391081192 \n", "1270 [{'credit_id': '52fe479ac3a36847f813eaa3', 'de... 284139100 \n", "1630 [{'credit_id': '5553e38bc3a368208f000502', 'de... 604942143 \n", "2532 [{'credit_id': '52fe4783c3a36847f8139f7f', 'de... 1021103568 \n", "961 [{'credit_id': '52fe4273c3a36847f801fab1', 'de... 933959197 \n", "906 NaN 1084939099 \n", "666 [{'credit_id': '5350e7b0c3a3681d93000e5d', 'de... 958400000 \n", "2737 [{'credit_id': '5751eed59251416b60000637', 'de... 880674609 \n", "1249 [{'credit_id': '55a239e69251412979002e8b', 'de... 419651413 \n", "2357 [{'credit_id': '52fe4799c3a36847f813e601', 'de... 662845518 \n", "1126 [{'credit_id': '52fe4495c3a368484e02b1cf', 'de... 1519557910 \n", "1880 [{'credit_id': '538d82720e0a26670e005e83', 'de... 459359555 \n", "2035 [{'credit_id': '52fe422ec3a36847f800a1d7', 'de... 550000000 \n", "1222 [{'credit_id': '52fe477fc3a36847f8139271', 'de... 559852396 \n", "763 [{'credit_id': '570bbe9ec3a3684b50000391', 'de... 531865000 \n", "1811 [{'credit_id': '52fe4233c3a36847f800bc99', 'de... 435000000 \n", "881 [{'credit_id': '52fe44c09251416c7503fbc3', 'de... 1025491110 \n", "2562 [{'credit_id': '55eb2fbb92514106d60041ab', 'de... 1028570889 \n", "1838 [{'credit_id': '52fe43b29251416c7501aa63', 'de... 586090727 \n", "2126 [{'credit_id': '59171547925141583c0315a6', 'de... 863416141 \n", "2623 NaN 90024292 \n", "927 [{'credit_id': '52fe47ef9251416c9107aa0f', 'de... 197687603 \n", "2322 [{'credit_id': '537f0b740e0a2624b40044d0', 'de... 1123746996 \n", "1394 [{'credit_id': '55bb85e6c3a36869b400723c', 'de... 209154322 \n", "1761 [{'credit_id': '52fe4cc8c3a36847f823e681', 'de... 1506249360 \n", "1673 [{'credit_id': '55a0eb4a925141296b0010f8', 'de... 1004558444 \n", "902 [{'credit_id': '52fe4227c3a36847f800847d', 'de... 786636033 \n", "... ... ... \n", "2484 [{'credit_id': '52fe43959251416c75016245', 'de... 30399676 \n", "152 [{'credit_id': '52fe45e5c3a36847f80e0a31', 'de... 241 \n", "151 [{'credit_id': '57cab4999251411ac500279a', 'de... 16905 \n", "1595 [{'credit_id': '54e1c53fc3a36845410081dc', 'de... 102617 \n", "1638 [{'credit_id': '52fe43e1c3a368484e0039a7', 'de... 30231200 \n", "1645 [{'credit_id': '5762f0fec3a3684b6700003c', 'de... 3692836 \n", "2463 [{'credit_id': '553b87649251412c27001037', 'de... 106498 \n", "1651 [{'credit_id': '52fe44f3c3a36847f80b3b93', 'de... 21890845 \n", "1701 [{'credit_id': '52fe4de89251416c7514504d', 'de... 7424 \n", "1700 [{'credit_id': '52fe432bc3a36847f803fa9f', 'de... 7286388 \n", "1697 [{'credit_id': '58fd30ef925141643c02713f', 'de... 21621000 \n", "2444 [{'credit_id': '57cf74b1c3a3684f230043e3', 'de... 694497 \n", "1696 [{'credit_id': '52fe44e7c3a36847f80b0f97', 'de... 5000000 \n", "1690 [{'credit_id': '53cf9579c3a368776d00734c', 'de... 5240 \n", "1689 [{'credit_id': '52fe43d3c3a36847f8072785', 'de... 457163 \n", "166 [{'credit_id': '52fe44249251416c7502a35b', 'de... 1723872 \n", "165 [{'credit_id': '52fe44709251416c91010a89', 'de... 25000 \n", "395 [{'credit_id': '54ad745f9251417538000081', 'de... 900689 \n", "2451 [{'credit_id': '548d71c592514122f90040b1', 'de... 523511 \n", "1687 [{'credit_id': '53cb0c80c3a3687775000626', 'de... 1718328 \n", "396 [{'credit_id': '5527585c92514115260005ef', 'de... 32230907 \n", "1683 [{'credit_id': '52fe499ac3a368484e13425b', 'de... 1878561 \n", "161 [{'credit_id': '52fe456f9251416c910322d7', 'de... 12277096 \n", "397 [{'credit_id': '573a39e09251415571000a4c', 'de... 1919653 \n", "2457 [{'credit_id': '5638a34a925141285701f217', 'de... 587615 \n", "2458 [{'credit_id': '547b481f9251412d7f00007a', 'de... 3295624 \n", "1667 [{'credit_id': '58d8fdf5c3a368121b065001', 'de... 596349 \n", "1658 [{'credit_id': '52fe466f9251416c75078339', 'de... 83398 \n", "1654 [{'credit_id': '52fe4c089251416c910ed7a3', 'de... 1261947 \n", "1821 [{'credit_id': '52fe456dc3a368484e05a2c3', 'de... 62153 \n", "\n", "[3000 rows x 23 columns]" ] }, "execution_count": 352, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.sort_values('budget', ascending=False)" ] }, { "cell_type": "code", "execution_count": 353, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idbelongs_to_collectionbudgetgenreshomepageimdb_idoriginal_languageoriginal_titleoverviewpopularity...release_dateruntimespoken_languagesstatustaglinetitleKeywordscastcrewrevenue
11261127[{'id': 86311, 'name': 'The Avengers Collectio...220000000[{'id': 878, 'name': 'Science Fiction'}, {'id'...http://marvel.com/avengers_movie/tt0848228enThe AvengersWhen an unexpected enemy emerges and threatens...89.887648...4/25/12143.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedSome assembly required.The Avengers[{'id': 242, 'name': 'new york'}, {'id': 5539,...[{'cast_id': 46, 'character': 'Tony Stark / Ir...[{'credit_id': '52fe4495c3a368484e02b1cf', 'de...1519557910
17611762[{'id': 9485, 'name': 'The Fast and the Furiou...190000000[{'id': 28, 'name': 'Action'}]http://www.furious7.com/tt2820852enFurious 7Deckard Shaw seeks revenge against Dominic Tor...27.275687...4/1/15137.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedVengeance Hits HomeFurious 7[{'id': 830, 'name': 'car race'}, {'id': 3428,...[{'cast_id': 17, 'character': 'Dominic Toretto...[{'credit_id': '52fe4cc8c3a36847f823e681', 'de...1506249360
27702771[{'id': 86311, 'name': 'The Avengers Collectio...280000000[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...http://marvel.com/movies/movie/193/avengers_ag...tt2395427enAvengers: Age of UltronWhen Tony Stark tries to jumpstart a dormant p...37.379420...4/22/15141.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedA New Age Has Come.Avengers: Age of Ultron[{'id': 8828, 'name': 'marvel comic'}, {'id': ...[{'cast_id': 76, 'character': 'Tony Stark / Ir...[{'credit_id': '55d5f7d4c3a3683e7e0016eb', 'de...1405403694
684685NaN160000000[{'id': 10751, 'name': 'Family'}, {'id': 14, '...http://movies.disney.com/beauty-and-the-beast-...tt2771200enBeauty and the BeastA live-action adaptation of Disney's version o...287.253654...3/16/17129.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedBe our guest.Beauty and the Beast[{'id': 254, 'name': 'france'}, {'id': 2343, '...[{'cast_id': 174, 'character': 'Belle', 'credi...[{'credit_id': '551879bec3a3681f840004eb', 'de...1262886337
23222323[{'id': 8650, 'name': 'Transformers Collection...195000000[{'id': 28, 'name': 'Action'}, {'id': 878, 'na...http://www.transformersmovie.com/tt1399103enTransformers: Dark of the MoonSam Witwicky takes his first tenuous steps int...4.503505...6/28/11154.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThe invasion we always feared. An enemy we nev...Transformers: Dark of the Moon[{'id': 305, 'name': 'moon'}, {'id': 1612, 'na...[{'cast_id': 3, 'character': 'Sam Witwicky', '...[{'credit_id': '537f0b740e0a2624b40044d0', 'de...1123746996
906907[{'id': 263, 'name': 'The Dark Knight Collecti...250000000[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...http://www.thedarkknightrises.com/tt1345836enThe Dark Knight RisesFollowing the death of District Attorney Harve...20.582580...7/16/12165.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThe Legend EndsThe Dark Knight Rises[{'id': 849, 'name': 'dc comics'}, {'id': 853,...NaNNaN1084939099
21352136[{'id': 295, 'name': 'Pirates of the Caribbean...380000000[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...http://disney.go.com/pirates/index-on-stranger...tt1298650enPirates of the Caribbean: On Stranger TidesCaptain Jack Sparrow crosses paths with a woma...27.887720...5/14/11136.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedLive Forever Or Die Trying.Pirates of the Caribbean: On Stranger Tides[{'id': 658, 'name': 'sea'}, {'id': 1316, 'nam...[{'cast_id': 15, 'character': 'Captain Jack Sp...[{'credit_id': '566b4f54c3a3683f56005151', 'de...1045713802
25622563[{'id': 137697, 'name': 'Finding Nemo Collecti...200000000[{'id': 12, 'name': 'Adventure'}, {'id': 16, '...http://movies.disney.com/finding-dorytt2277860enFinding DoryDory is reunited with her friends Nemo and Mar...14.477677...6/16/1697.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedAn unforgettable journey she probably won't re...Finding Dory[{'id': 1357, 'name': 'fish'}, {'id': 1453, 'n...[{'cast_id': 2, 'character': 'Dory (voice)', '...[{'credit_id': '55eb2fbb92514106d60041ab', 'de...1028570889
881882[{'id': 261307, 'name': 'Alice in Wonderland C...200000000[{'id': 10751, 'name': 'Family'}, {'id': 14, '...http://disney.go.com/wonderland/tt1014759enAlice in WonderlandAlice, an unpretentious and individual 19-year...17.285093...3/3/10108.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedYou're invited to a very important date.Alice in Wonderland[{'id': 818, 'name': 'based on novel'}, {'id':...[{'cast_id': 7, 'character': 'Alice Kingsleigh...[{'credit_id': '52fe44c09251416c7503fbc3', 'de...1025491110
734735NaN150000000[{'id': 16, 'name': 'Animation'}, {'id': 12, '...http://movies.disney.com/zootopiatt2948356enZootopiaDetermined to prove herself, Officer Judy Hopp...26.024868...2/11/16108.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedWelcome to the urban jungle.Zootopia[{'id': 2527, 'name': 'fox'}, {'id': 10360, 'n...[{'cast_id': 23, 'character': 'Judy Hopps (voi...[{'credit_id': '536e09650e0a2647cb00fe6b', 'de...1023784195
25322533[{'id': 121938, 'name': 'The Hobbit Collection...250000000[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...http://www.thehobbit.com/tt0903624enThe Hobbit: An Unexpected JourneyBilbo Baggins, a hobbit enjoying his quiet lif...23.253089...11/26/12169.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedFrom the smallest beginnings come the greatest...The Hobbit: An Unexpected Journey[{'id': 483, 'name': 'riddle'}, {'id': 603, 'n...[{'cast_id': 6, 'character': 'Gandalf', 'credi...[{'credit_id': '52fe4783c3a36847f8139f7f', 'de...1021103568
16731674[{'id': 263, 'name': 'The Dark Knight Collecti...185000000[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...http://thedarkknight.warnerbros.com/dvdsite/tt0468569enThe Dark KnightBatman raises the stakes in his war on crime. ...123.167259...7/16/08152.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedWhy So Serious?The Dark Knight[{'id': 849, 'name': 'dc comics'}, {'id': 853,...[{'cast_id': 35, 'character': 'Bruce Wayne / B...[{'credit_id': '55a0eb4a925141296b0010f8', 'de...1004558444
22092210[{'id': 295, 'name': 'Pirates of the Caribbean...300000000[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...http://disney.go.com/disneypictures/pirates/tt0449088enPirates of the Caribbean: At World's EndCaptain Barbossa, long believed to be dead, ha...31.363664...5/19/07169.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedAt the end of the world, the adventure begins.Pirates of the Caribbean: At World's End[{'id': 270, 'name': 'ocean'}, {'id': 726, 'na...[{'cast_id': 4, 'character': 'Captain Jack Spa...[{'credit_id': '52fe4232c3a36847f800b579', 'de...961000000
666667[{'id': 121938, 'name': 'The Hobbit Collection...250000000[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...http://www.thehobbit.com/tt1170358enThe Hobbit: The Desolation of SmaugThe Dwarves, Bilbo and Gandalf have successful...20.644776...12/11/13161.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedBeyond darkness... beyond desolation... lies t...The Hobbit: The Desolation of Smaug[{'id': 603, 'name': 'elves'}, {'id': 604, 'na...[{'cast_id': 3, 'character': 'Bilbo Baggins', ...[{'credit_id': '5350e7b0c3a3681d93000e5d', 'de...958400000
961962[{'id': 1241, 'name': 'Harry Potter Collection...250000000[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...http://harrypotter.warnerbros.com/harrypottera...tt0417741enHarry Potter and the Half-Blood PrinceAs Harry begins his sixth year at Hogwarts, he...19.083723...7/7/09153.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedDark Secrets RevealedHarry Potter and the Half-Blood Prince[{'id': 616, 'name': 'witch'}, {'id': 2343, 'n...[{'cast_id': 3, 'character': 'Harry Potter', '...[{'credit_id': '52fe4273c3a36847f801fab1', 'de...933959197
543544[{'id': 119, 'name': 'The Lord of the Rings Co...79000000[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...http://www.lordoftherings.net/tt0167261enThe Lord of the Rings: The Two TowersFrodo and Sam are trekking to Mordor to destro...29.423537...12/18/02179.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedA New Power Is Rising.The Lord of the Rings: The Two Towers[{'id': 603, 'name': 'elves'}, {'id': 606, 'na...[{'cast_id': 13, 'character': 'Frodo Baggins',...[{'credit_id': '52fe421ac3a36847f800454f', 'de...926287400
17351736[{'id': 328, 'name': 'Jurassic Park Collection...63000000[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...http://www.jurassicpark.com/tt0107290enJurassic ParkA wealthy entrepreneur secretly creates a them...8.863776...6/11/93127.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedAn adventure 65 million years in the making.Jurassic Park[{'id': 911, 'name': 'exotic island'}, {'id': ...[{'cast_id': 4, 'character': 'Dr. Alan Grant',...[{'credit_id': '52fe4238c3a36847f800d291', 'de...920100000
23872388[{'id': 8354, 'name': 'Ice Age Collection', 'p...90000000[{'id': 16, 'name': 'Animation'}, {'id': 35, '...http://www.iceagemovies.com/films/ice-age-dawn...tt1080016enIce Age: Dawn of the DinosaursTimes are changing for Manny the moody mammoth...12.980624...6/29/0994.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedYou Won't Believe Your Ice!Ice Age: Dawn of the Dinosaurs[{'id': 2219, 'name': 'ice age'}, {'id': 3450,...[{'cast_id': 2, 'character': 'Manny (voice)', ...[{'credit_id': '52fe44a2c3a36847f80a13a1', 'de...886686817
27372738[{'id': 645, 'name': 'James Bond Collection', ...245000000[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...http://www.sonypictures.com/movies/spectre/tt2379713enSpectreA cryptic message from Bond‚Äôs past sends him...24.926577...10/26/15148.0[{'iso_639_1': 'fr', 'name': 'Français'}, {'is...ReleasedA Plan No One EscapesSpectre[{'id': 470, 'name': 'spy'}, {'id': 818, 'name...[{'cast_id': 1, 'character': 'James Bond', 'cr...[{'credit_id': '5751eed59251416b60000637', 'de...880674609
28022803[{'id': 1241, 'name': 'Harry Potter Collection...100000000[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...NaNtt0295297enHarry Potter and the Chamber of SecretsIgnoring threats to his life, Harry returns to...29.741452...11/13/02161.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedHogwarts is back in session.Harry Potter and the Chamber of Secrets[{'id': 391, 'name': 'flying car'}, {'id': 616...[{'cast_id': 23, 'character': 'Harry Potter', ...[{'credit_id': '52fe4267c3a36847f801bf67', 'de...876688482
114115[{'id': 119, 'name': 'The Lord of the Rings Co...93000000[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...http://www.lordoftherings.net/tt0120737enThe Lord of the Rings: The Fellowship of the RingYoung hobbit Frodo Baggins, after inheriting a...32.070725...12/18/01178.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedOne ring to rule them allThe Lord of the Rings: The Fellowship of the Ring[{'id': 603, 'name': 'elves'}, {'id': 604, 'na...[{'cast_id': 28, 'character': 'Frodo Baggins',...[{'credit_id': '52fe421ac3a36847f80043ef', 'de...871368364
21262127[{'id': 284433, 'name': 'Guardians of the Gala...200000000[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...http://marvel.com/movies/movie/221/guardians_o...tt3896198enGuardians of the Galaxy Vol. 2The Guardians must fight to keep their newfoun...185.330992...4/19/17137.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedObviously.Guardians of the Galaxy Vol. 2[{'id': 9663, 'name': 'sequel'}, {'id': 9715, ...[{'cast_id': 3, 'character': 'Peter Quill / St...[{'credit_id': '59171547925141583c0315a6', 'de...863416141
689690[{'id': 10, 'name': 'Star Wars Collection', 'p...113000000[{'id': 878, 'name': 'Science Fiction'}, {'id'...http://www.starwars.com/films/star-wars-episod...tt0121766enStar Wars: Episode III - Revenge of the SithYears after the onset of the Clone Wars, the n...13.165421...5/17/05140.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThe saga is complete.Star Wars: Episode III - Revenge of the Sith[{'id': 797, 'name': 'showdown'}, {'id': 10013...[{'cast_id': 13, 'character': 'Obi-Wan Kenobi'...[{'credit_id': '52fe431fc3a36847f803bea3', 'de...850000000
13321333[{'id': 8650, 'name': 'Transformers Collection...150000000[{'id': 878, 'name': 'Science Fiction'}, {'id'...http://www.transformersmovie.com/tt1055369enTransformers: Revenge of the FallenSam Witwicky leaves the Autobots behind for a ...4.351756...6/19/09150.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedRevenge is coming.Transformers: Revenge of the Fallen[{'id': 1160, 'name': 'egypt'}, {'id': 1670, '...[{'cast_id': 5, 'character': 'Sam Witwicky', '...[{'credit_id': '537f0791c3a3680598002d4a', 'de...836297228
518519[{'id': 468552, 'name': 'Wonder Woman Collecti...149000000[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...http://www.warnerbros.com/wonder-womantt0451279enWonder WomanAn Amazon princess comes to the world of Man t...294.337037...5/30/17141.0[{'iso_639_1': 'de', 'name': 'Deutsch'}, {'iso...ReleasedPower. Grace. Wisdom. Wonder.Wonder Woman[{'id': 849, 'name': 'dc comics'}, {'id': 1701...NaNNaN820580447
357358[{'id': 94032, 'name': 'The Lion King Collecti...45000000[{'id': 10751, 'name': 'Family'}, {'id': 16, '...http://movies.disney.com/the-lion-kingtt0110357enThe Lion KingA young lion cub named Simba can't wait to be ...21.605761...6/23/9489.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedLife's greatest adventure is finding your plac...The Lion King[{'id': 2143, 'name': 'loss of parents'}, {'id...[{'cast_id': 8, 'character': 'Young Simba (voi...[{'credit_id': '59161a6bc3a36842690252b9', 'de...788241776
902903[{'id': 84, 'name': 'Indiana Jones Collection'...185000000[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...http://www.indianajones.com/site/index.htmltt0367882enIndiana Jones and the Kingdom of the Crystal S...Set during the Cold War, the Soviets – led b...12.577266...5/21/08122.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedThe adventure continues . . .Indiana Jones and the Kingdom of the Crystal S...[{'id': 83, 'name': 'saving the world'}, {'id'...[{'cast_id': 4, 'character': 'Indiana Jones', ...[{'credit_id': '52fe4227c3a36847f800847d', 'de...786636033
23262327[{'id': 448150, 'name': 'Deadpool Collection',...58000000[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...http://www.foxmovies.com/movies/deadpooltt1431045enDeadpoolDeadpool tells the origin story of former Spec...187.860492...2/9/16108.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedWitness the beginning of a happy endingDeadpool[{'id': 2095, 'name': 'anti hero'}, {'id': 307...[{'cast_id': 99, 'character': 'Wade Wilson / D...[{'credit_id': '57e564fac3a3681beb0000cc', 'de...783112979
20182019[{'id': 284433, 'name': 'Guardians of the Gala...170000000[{'id': 28, 'name': 'Action'}, {'id': 878, 'na...http://marvel.com/guardianstt2015381enGuardians of the GalaxyLight years from Earth, 26 years after being a...53.291601...7/30/14121.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedAll heroes start somewhere.Guardians of the Galaxy[{'id': 8828, 'name': 'marvel comic'}, {'id': ...[{'cast_id': 1, 'character': 'Peter Quill / St...[{'credit_id': '538ce329c3a3687155003358', 'de...773328629
941942NaN180000000[{'id': 14, 'name': 'Fantasy'}, {'id': 12, 'na...http://movies.disney.com/maleficenttt1587310enMaleficentThe untold story of Disney's most iconic villa...19.467404...5/28/1497.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedDon't believe the fairy tale.Maleficent[{'id': 3205, 'name': 'fairy tale'}, {'id': 32...[{'cast_id': 7, 'character': 'Maleficent', 'cr...[{'credit_id': '52fe4a0cc3a36847f81b5c9f', 'de...758539785
..................................................................
10061007NaN2000000[{'id': 53, 'name': 'Thriller'}]NaNtt0429277enZyzzyx RoadA married man (Leo Grillo) meets a beautiful w...0.717595...2/25/0690.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedDead AheadZyzzyx Road[{'id': 9937, 'name': 'suspense'}][{'cast_id': 1, 'character': 'Marissa', 'credi...[{'credit_id': '52fe45c6c3a36847f80d9bd3', 'de...30
28742875NaN0[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...http://www.edwardburns.net/tt1619037enNice Guy JohnnyJohnny Rizzo, is about to trade his dream job ...2.166081...11/4/1089.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedNaNNice Guy Johnny[{'id': 966, 'name': 'beach'}, {'id': 2848, 'n...[{'cast_id': 3, 'character': 'Johnny Rizzo', '...[{'credit_id': '52fe466cc3a36847f80fe3c1', 'de...25
498499NaN0[{'id': 99, 'name': 'Documentary'}]NaNtt0841119enLake of FireAn unflinching look at the how the battle over...0.915050...9/9/06152.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedExploring the Issue that Divides the WorldLake of FireNaN[{'cast_id': 3, 'character': 'Himself', 'credi...[{'credit_id': '52fe4681c3a36847f8102b29', 'de...25
20322033NaN4[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...NaNtt0022879enA Farewell to ArmsBritish nurse Catherine Barkley (Helen Hayes) ...1.914697...12/8/3289.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedEvery woman who has loved will understandA Farewell to Arms[{'id': 131, 'name': 'italy'}, {'id': 428, 'na...[{'cast_id': 1, 'character': 'Catherine Barkle...[{'credit_id': '56d73d3e9251414291002436', 'de...25
18841885NaN12000000[{'id': 9648, 'name': 'Mystery'}, {'id': 53, '...NaNtt0199626enIn the CutFollowing the gruesome murder of a young woman...5.799628...9/9/03119.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedEverything you know about desire is dead wrong.In the Cut[{'id': 1664, 'name': 'eroticism'}, {'id': 993...[{'cast_id': 1, 'character': 'Frannie', 'credi...[{'credit_id': '5628abae9251414cd8001453', 'de...23
269270NaN0[{'id': 99, 'name': 'Documentary'}, {'id': 107...http://www.glassthemovie.com/Home.htmltt1092004enGlass: A Portrait of Philip in Twelve PartsAcademy Award®-nominated director Scott Hicks ...0.498877...9/7/07119.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedIn July 2005, filmmaker Scott Hicks started sh...Glass: A Portrait of Philip in Twelve PartsNaN[{'cast_id': 2, 'character': 'Himself', 'credi...[{'credit_id': '52fe472e9251416c9106caeb', 'de...20
150151NaN0[{'id': 12, 'name': 'Adventure'}, {'id': 18, '...NaNtt0081760enWindwalkerAn ancient Indian warrior who has reached the ...1.178642...12/12/80108.0NaNReleasedHe conquered love and death...now he walks the...Windwalker[{'id': 1262, 'name': 'mountain'}, {'id': 1930...[{'cast_id': 5, 'character': 'Windwalker', 'cr...[{'credit_id': '52fe494b9251416c910a8add', 'de...18
20902091NaN10000000[{'id': 80, 'name': 'Crime'}, {'id': 18, 'name...NaNtt0106684enDeadfallAfter he accidentally kills his father, Mike, ...1.145806...10/8/9398.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...Released...The ultimate conDeadfall[{'id': 9826, 'name': 'murder'}, {'id': 15015,...[{'cast_id': 3, 'character': 'Joe Donan', 'cre...[{'credit_id': '52fe45449251416c9102c71f', 'de...18
23992400NaN0[{'id': 53, 'name': 'Thriller'}, {'id': 18, 'n...http://www.kaiji-movie.jptt1904937jaカイジ2 人生奪回ゲーム3 years after the ultimate life-or-death game ...1.133760...11/5/11133.0[{'iso_639_1': 'ja', 'name': '日本語'}]ReleasedNaNKaiji 2: The Ultimate GamblerNaN[{'cast_id': 1, 'character': 'Kaiji Ito', 'cre...[{'credit_id': '52fe4a68c3a36847f81cc20f', 'de...15
27592760NaN0[{'id': 28, 'name': 'Action'}, {'id': 35, 'nam...NaNtt0112909enDou Sing 2 - Gai Tau Dou SingLui is the tyrant of gamblers in Macau. He put...0.084720...6/28/9598.0[{'iso_639_1': 'cn', 'name': '广州话 / 廣州話'}]ReleasedNaNThe Saint of Gamblers[{'id': 395, 'name': 'gambling'}][{'cast_id': 3, 'character': 'God Bless You', ...[{'credit_id': '52fe48bf9251416c750b1f75', 'de...13
450451NaN23000000[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...NaNtt0360139enChasing LibertyThe President's daughter, unable to experience...5.950792...1/9/04111.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedHow do you fall in love with the whole world w...Chasing Liberty[{'id': 110, 'name': 'venice'}, {'id': 220, 'n...[{'cast_id': 1, 'character': 'Anna Foster', 'c...[{'credit_id': '563c813992514150af00414f', 'de...12
312313NaN16000000[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...NaNtt0380277enThe CookoutWhen Todd Anderson signs a $30 million deal wi...1.758079...9/3/0497.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThis summer, get your grill on!The Cookout[{'id': 10267, 'name': 'comedy'}][{'cast_id': 1, 'character': 'blin bling', 'cr...[{'credit_id': '52fe477f9251416c7509b8ed', 'de...12
11411142NaN0[{'id': 18, 'name': 'Drama'}, {'id': 10769, 'n...NaNtt1922561ko북촌방향Sang-Joon is a professor in the film departmen...1.042432...9/8/1179.0[{'iso_639_1': 'ko', 'name': '한국어/조선말'}]ReleasedNaNThe Day He ArrivesNaN[{'cast_id': 3, 'character': 'Sungjoon', 'cred...[{'credit_id': '52fe49499251416c910a86f1', 'de...11
665666[{'id': 86668, 'name': 'Elektra Luxx Collectio...0[{'id': 28, 'name': 'Action'}, {'id': 35, 'nam...http://www.elektraluxx-movie.com/tt1340773enElektra LuxxA favor for a woman from her past throws the l...4.118699...3/14/1098.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedFrom adult film star to functioning adult.Elektra Luxx[{'id': 10183, 'name': 'independent film'}, {'...[{'cast_id': 3, 'character': 'Elektra Luxx', '...[{'credit_id': '52fe48f6c3a36847f818264f', 'de...10
280281[{'id': 381396, 'name': 'Bats Collection', 'po...0[{'id': 27, 'name': 'Horror'}, {'id': 53, 'nam...NaNtt0200469enBatsGenetically mutated bats escape and it's up to...2.584025...10/22/9991.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThey will suck you dryBats[{'id': 2766, 'name': 'mutation'}, {'id': 5155...[{'cast_id': 1, 'character': 'Sheriff Emmett K...[{'credit_id': '52fe437b9251416c750120e3', 'de...10
22512252NaN130[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...NaNtt1729637hiBodyguardLovely Singh (Salman Khan) is the bodyguard of...2.402032...8/31/11130.0[{'iso_639_1': 'hi', 'name': 'हिन्दी'}]ReleasedNaNBodyguard[{'id': 2251, 'name': 'lie'}, {'id': 2913, 'na...[{'cast_id': 1, 'character': 'Lovely B. Singh'...[{'credit_id': '52fe48a1c3a368484e102d25', 'de...8
639640NaN6[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...NaNtt0183659enPollockIn August of 1949, Life Magazine ran a banner ...11.179864...9/6/00122.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedA true portrait of life and art.Pollock[{'id': 2679, 'name': 'artist'}, {'id': 3836, ...[{'cast_id': 1, 'character': 'Jackson Pollock'...[{'credit_id': '52fe44f19251416c75046447', 'de...8
25822583NaN0[{'id': 28, 'name': 'Action'}, {'id': 35, 'nam...NaNtt0339230svKoppsA small Swedish village, Högboträsk, is so p...2.939129...2/6/0390.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedNaNKops[{'id': 5543, 'name': 'cow'}, {'id': 7490, 'na...[{'cast_id': 1, 'character': 'Jacob', 'credit_...[{'credit_id': '52fe44ddc3a36847f80ae983', 'de...8
11901191NaN0[{'id': 16, 'name': 'Animation'}, {'id': 10751...NaNtt0089984enHe-Man and She-Ra: The Secret of the SwordAfter experiencing traumatic nightmares of a t...2.087446...1/1/85100.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedNaNHe-Man and She-Ra: The Secret of the Sword[{'id': 1400, 'name': 'swordplay'}, {'id': 193...[{'cast_id': 13, 'character': 'Prince Adam / H...[{'credit_id': '52fe47d59251416c750a6ef7', 'de...7
24902491NaN6400000[{'id': 53, 'name': 'Thriller'}, {'id': 10749,...NaNtt0113965enNever Talk to StrangersSarah Taylor, a police psychologist, meets a m...7.506958...10/20/9586.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedIn A World Where Love Isn't Always Safe, Trust...Never Talk to Strangers[{'id': 6103, 'name': 'telephone'}, {'id': 614...[{'cast_id': 1, 'character': 'Dr. Sarah Taylor...[{'credit_id': '52fe4775c3a36847f8136a6d', 'de...6
13461347NaN1[{'id': 18, 'name': 'Drama'}]NaNtt0048028enEast of EdenIn the Salinas Valley, in and around World War...6.428830...3/9/55115.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedOf what a girl did . . . what a boy did ... of...East of Eden[{'id': 30, 'name': 'individual'}, {'id': 380,...[{'cast_id': 4, 'character': 'Caleb Trask', 'c...[{'credit_id': '52fe4228c3a36847f800869d', 'de...5
21172118NaN344[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...http://www.americanadobo.comtt0294289enAmerican AdoboThe everyday struggles of people trying to bri...0.158651...9/29/01104.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedSex is the appetizer. Love is the main course....American Adobo[{'id': 10183, 'name': 'independent film'}, {'...[{'cast_id': 1, 'character': '', 'credit_id': ...[{'credit_id': '52fe47d8c3a36847f814b4fd', 'de...4
334335NaN1[{'id': 28, 'name': 'Action'}, {'id': 18, 'nam...NaNtt0376144taSaamySaamy movie is all about Arusaamy (Vikram) who...0.438490...5/5/03140.0[{'iso_639_1': 'ta', 'name': 'தமிழ்'}]ReleasedNaNSaamyNaN[{'cast_id': 11, 'character': 'Aaruchamy', 'cr...[{'credit_id': '52fe4782c3a36847f813999f', 'de...3
15411542NaN750000[{'id': 80, 'name': 'Crime'}, {'id': 35, 'name...NaNtt3805180ruВсе и сразуNaN0.201582...6/5/140.0[{'iso_639_1': 'ru', 'name': 'Pусский'}]ReleasedNaNAll at Once[{'id': 642, 'name': 'robbery'}, {'id': 231149...[{'cast_id': 3, 'character': 'Viktor', 'credit...[{'credit_id': '53b7a7b90e0a2676b8006ab6', 'de...3
23832384[{'id': 369004, 'name': 'Borsalino Collection'...0[{'id': 80, 'name': 'Crime'}, {'id': 18, 'name...NaNtt0065486frBorsalinoIn 1930 Marseilles two small-time crooks join ...4.471645...5/19/70126.0[{'iso_639_1': 'fr', 'name': 'Français'}, {'is...ReleasedNaNBorsalinoNaN[{'cast_id': 1, 'character': 'Fran√ßois Capell...[{'credit_id': '531cd71cc3a3685c31006e82', 'de...3
695696NaN1[{'id': 10749, 'name': 'Romance'}, {'id': 53, ...NaNtt0374271hiTere NaamLower Caste Radhey Mohan is a no good slacker,...1.280455...8/15/03132.0[{'iso_639_1': 'hi', 'name': 'हिन्दी'}]ReleasedUnfortunately a true love storyTere NaamNaN[{'cast_id': 3, 'character': 'Radhe Mohan', 'c...[{'credit_id': '52fe491ec3a36847f8189cfb', 'de...2
19171918NaN592[{'id': 10749, 'name': 'Romance'}, {'id': 18, ...NaNtt0016104enThe Merry WidowPrince Danilo falls in love with dancer Sally ...0.286719...8/26/25137.0NaNReleasedNaNThe Merry Widow[{'id': 1691, 'name': 'dance'}, {'id': 10181, ...[{'cast_id': 2, 'character': \"Sally O'Hara\", '...[{'credit_id': '57351170c3a36802410000d5', 'de...1
18741875NaN0[{'id': 80, 'name': 'Crime'}, {'id': 18, 'name...http://www.vt4.be/vermisttt1133617enVermistFive members of the Missing Persons Task Force...0.229233...1/1/070.0[{'iso_639_1': 'nl', 'name': 'Nederlands'}]ReleasedNaNMissingNaN[{'cast_id': 2, 'character': 'Murat Hoxha', 'c...[{'credit_id': '52fe4838c3a36847f815cef1', 'de...1
347348NaN12[{'id': 14, 'name': 'Fantasy'}, {'id': 18, 'na...NaNtt0118172enThe Wind in the WillowsJailed for his reckless driving, rambunctious ...1.410322...10/16/9684.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedGo wild in the country!The Wind in the WillowsNaN[{'cast_id': 2, 'character': 'Toad', 'credit_i...[{'credit_id': '59ce7655c3a3686aca01a522', 'de...1
17541755NaN2[{'id': 53, 'name': 'Thriller'}, {'id': 10769,...NaNtt0110604enMute WitnessBilly is mute, but it hasn't kept her from bec...2.466066...9/28/9595.0[{'iso_639_1': 'ru', 'name': 'Pусский'}, {'iso...ReleasedShe Can't Speak. She Can't Scream. She Can't B...Mute Witness[{'id': 3713, 'name': 'chase'}, {'id': 6149, '...[{'cast_id': 22, 'character': 'Billy Hughes', ...[{'credit_id': '52fe4775c3a36847f8136af1', 'de...1
\n", "

3000 rows × 23 columns

\n", "
" ], "text/plain": [ " id belongs_to_collection budget \\\n", "1126 1127 [{'id': 86311, 'name': 'The Avengers Collectio... 220000000 \n", "1761 1762 [{'id': 9485, 'name': 'The Fast and the Furiou... 190000000 \n", "2770 2771 [{'id': 86311, 'name': 'The Avengers Collectio... 280000000 \n", "684 685 NaN 160000000 \n", "2322 2323 [{'id': 8650, 'name': 'Transformers Collection... 195000000 \n", "906 907 [{'id': 263, 'name': 'The Dark Knight Collecti... 250000000 \n", "2135 2136 [{'id': 295, 'name': 'Pirates of the Caribbean... 380000000 \n", "2562 2563 [{'id': 137697, 'name': 'Finding Nemo Collecti... 200000000 \n", "881 882 [{'id': 261307, 'name': 'Alice in Wonderland C... 200000000 \n", "734 735 NaN 150000000 \n", "2532 2533 [{'id': 121938, 'name': 'The Hobbit Collection... 250000000 \n", "1673 1674 [{'id': 263, 'name': 'The Dark Knight Collecti... 185000000 \n", "2209 2210 [{'id': 295, 'name': 'Pirates of the Caribbean... 300000000 \n", "666 667 [{'id': 121938, 'name': 'The Hobbit Collection... 250000000 \n", "961 962 [{'id': 1241, 'name': 'Harry Potter Collection... 250000000 \n", "543 544 [{'id': 119, 'name': 'The Lord of the Rings Co... 79000000 \n", "1735 1736 [{'id': 328, 'name': 'Jurassic Park Collection... 63000000 \n", "2387 2388 [{'id': 8354, 'name': 'Ice Age Collection', 'p... 90000000 \n", "2737 2738 [{'id': 645, 'name': 'James Bond Collection', ... 245000000 \n", "2802 2803 [{'id': 1241, 'name': 'Harry Potter Collection... 100000000 \n", "114 115 [{'id': 119, 'name': 'The Lord of the Rings Co... 93000000 \n", "2126 2127 [{'id': 284433, 'name': 'Guardians of the Gala... 200000000 \n", "689 690 [{'id': 10, 'name': 'Star Wars Collection', 'p... 113000000 \n", "1332 1333 [{'id': 8650, 'name': 'Transformers Collection... 150000000 \n", "518 519 [{'id': 468552, 'name': 'Wonder Woman Collecti... 149000000 \n", "357 358 [{'id': 94032, 'name': 'The Lion King Collecti... 45000000 \n", "902 903 [{'id': 84, 'name': 'Indiana Jones Collection'... 185000000 \n", "2326 2327 [{'id': 448150, 'name': 'Deadpool Collection',... 58000000 \n", "2018 2019 [{'id': 284433, 'name': 'Guardians of the Gala... 170000000 \n", "941 942 NaN 180000000 \n", "... ... ... ... \n", "1006 1007 NaN 2000000 \n", "2874 2875 NaN 0 \n", "498 499 NaN 0 \n", "2032 2033 NaN 4 \n", "1884 1885 NaN 12000000 \n", "269 270 NaN 0 \n", "150 151 NaN 0 \n", "2090 2091 NaN 10000000 \n", "2399 2400 NaN 0 \n", "2759 2760 NaN 0 \n", "450 451 NaN 23000000 \n", "312 313 NaN 16000000 \n", "1141 1142 NaN 0 \n", "665 666 [{'id': 86668, 'name': 'Elektra Luxx Collectio... 0 \n", "280 281 [{'id': 381396, 'name': 'Bats Collection', 'po... 0 \n", "2251 2252 NaN 130 \n", "639 640 NaN 6 \n", "2582 2583 NaN 0 \n", "1190 1191 NaN 0 \n", "2490 2491 NaN 6400000 \n", "1346 1347 NaN 1 \n", "2117 2118 NaN 344 \n", "334 335 NaN 1 \n", "1541 1542 NaN 750000 \n", "2383 2384 [{'id': 369004, 'name': 'Borsalino Collection'... 0 \n", "695 696 NaN 1 \n", "1917 1918 NaN 592 \n", "1874 1875 NaN 0 \n", "347 348 NaN 12 \n", "1754 1755 NaN 2 \n", "\n", " genres \\\n", "1126 [{'id': 878, 'name': 'Science Fiction'}, {'id'... \n", "1761 [{'id': 28, 'name': 'Action'}] \n", "2770 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n", "684 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... \n", "2322 [{'id': 28, 'name': 'Action'}, {'id': 878, 'na... \n", "906 [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam... \n", "2135 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... \n", "2562 [{'id': 12, 'name': 'Adventure'}, {'id': 16, '... \n", "881 [{'id': 10751, 'name': 'Family'}, {'id': 14, '... \n", "734 [{'id': 16, 'name': 'Animation'}, {'id': 12, '... \n", "2532 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "1673 [{'id': 18, 'name': 'Drama'}, {'id': 28, 'name... \n", "2209 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "666 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "961 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "543 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "1735 [{'id': 12, 'name': 'Adventure'}, {'id': 878, ... \n", "2387 [{'id': 16, 'name': 'Animation'}, {'id': 35, '... \n", "2737 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n", "2802 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "114 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n", "2126 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n", "689 [{'id': 878, 'name': 'Science Fiction'}, {'id'... \n", "1332 [{'id': 878, 'name': 'Science Fiction'}, {'id'... \n", "518 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n", "357 [{'id': 10751, 'name': 'Family'}, {'id': 16, '... \n", "902 [{'id': 12, 'name': 'Adventure'}, {'id': 28, '... \n", "2326 [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam... \n", "2018 [{'id': 28, 'name': 'Action'}, {'id': 878, 'na... \n", "941 [{'id': 14, 'name': 'Fantasy'}, {'id': 12, 'na... \n", "... ... \n", "1006 [{'id': 53, 'name': 'Thriller'}] \n", "2874 [{'id': 35, 'name': 'Comedy'}, {'id': 10749, '... \n", "498 [{'id': 99, 'name': 'Documentary'}] \n", "2032 [{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n... \n", "1884 [{'id': 9648, 'name': 'Mystery'}, {'id': 53, '... \n", "269 [{'id': 99, 'name': 'Documentary'}, {'id': 107... \n", "150 [{'id': 12, 'name': 'Adventure'}, {'id': 18, '... \n", "2090 [{'id': 80, 'name': 'Crime'}, {'id': 18, 'name... \n", "2399 [{'id': 53, 'name': 'Thriller'}, {'id': 18, 'n... \n", "2759 [{'id': 28, 'name': 'Action'}, {'id': 35, 'nam... \n", "450 [{'id': 35, 'name': 'Comedy'}, {'id': 10749, '... \n", "312 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n", "1141 [{'id': 18, 'name': 'Drama'}, {'id': 10769, 'n... \n", "665 [{'id': 28, 'name': 'Action'}, {'id': 35, 'nam... \n", "280 [{'id': 27, 'name': 'Horror'}, {'id': 53, 'nam... \n", "2251 [{'id': 18, 'name': 'Drama'}, {'id': 28, 'name... \n", "639 [{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n... \n", "2582 [{'id': 28, 'name': 'Action'}, {'id': 35, 'nam... \n", "1190 [{'id': 16, 'name': 'Animation'}, {'id': 10751... \n", "2490 [{'id': 53, 'name': 'Thriller'}, {'id': 10749,... \n", "1346 [{'id': 18, 'name': 'Drama'}] \n", "2117 [{'id': 35, 'name': 'Comedy'}, {'id': 10749, '... \n", "334 [{'id': 28, 'name': 'Action'}, {'id': 18, 'nam... \n", "1541 [{'id': 80, 'name': 'Crime'}, {'id': 35, 'name... \n", "2383 [{'id': 80, 'name': 'Crime'}, {'id': 18, 'name... \n", "695 [{'id': 10749, 'name': 'Romance'}, {'id': 53, ... \n", "1917 [{'id': 10749, 'name': 'Romance'}, {'id': 18, ... \n", "1874 [{'id': 80, 'name': 'Crime'}, {'id': 18, 'name... \n", "347 [{'id': 14, 'name': 'Fantasy'}, {'id': 18, 'na... \n", "1754 [{'id': 53, 'name': 'Thriller'}, {'id': 10769,... \n", "\n", " homepage imdb_id \\\n", "1126 http://marvel.com/avengers_movie/ tt0848228 \n", "1761 http://www.furious7.com/ tt2820852 \n", "2770 http://marvel.com/movies/movie/193/avengers_ag... tt2395427 \n", "684 http://movies.disney.com/beauty-and-the-beast-... tt2771200 \n", "2322 http://www.transformersmovie.com/ tt1399103 \n", "906 http://www.thedarkknightrises.com/ tt1345836 \n", "2135 http://disney.go.com/pirates/index-on-stranger... tt1298650 \n", "2562 http://movies.disney.com/finding-dory tt2277860 \n", "881 http://disney.go.com/wonderland/ tt1014759 \n", "734 http://movies.disney.com/zootopia tt2948356 \n", "2532 http://www.thehobbit.com/ tt0903624 \n", "1673 http://thedarkknight.warnerbros.com/dvdsite/ tt0468569 \n", "2209 http://disney.go.com/disneypictures/pirates/ tt0449088 \n", "666 http://www.thehobbit.com/ tt1170358 \n", "961 http://harrypotter.warnerbros.com/harrypottera... tt0417741 \n", "543 http://www.lordoftherings.net/ tt0167261 \n", "1735 http://www.jurassicpark.com/ tt0107290 \n", "2387 http://www.iceagemovies.com/films/ice-age-dawn... tt1080016 \n", "2737 http://www.sonypictures.com/movies/spectre/ tt2379713 \n", "2802 NaN tt0295297 \n", "114 http://www.lordoftherings.net/ tt0120737 \n", "2126 http://marvel.com/movies/movie/221/guardians_o... tt3896198 \n", "689 http://www.starwars.com/films/star-wars-episod... tt0121766 \n", "1332 http://www.transformersmovie.com/ tt1055369 \n", "518 http://www.warnerbros.com/wonder-woman tt0451279 \n", "357 http://movies.disney.com/the-lion-king tt0110357 \n", "902 http://www.indianajones.com/site/index.html tt0367882 \n", "2326 http://www.foxmovies.com/movies/deadpool tt1431045 \n", "2018 http://marvel.com/guardians tt2015381 \n", "941 http://movies.disney.com/maleficent tt1587310 \n", "... ... ... \n", "1006 NaN tt0429277 \n", "2874 http://www.edwardburns.net/ tt1619037 \n", "498 NaN tt0841119 \n", "2032 NaN tt0022879 \n", "1884 NaN tt0199626 \n", "269 http://www.glassthemovie.com/Home.html tt1092004 \n", "150 NaN tt0081760 \n", "2090 NaN tt0106684 \n", "2399 http://www.kaiji-movie.jp tt1904937 \n", "2759 NaN tt0112909 \n", "450 NaN tt0360139 \n", "312 NaN tt0380277 \n", "1141 NaN tt1922561 \n", "665 http://www.elektraluxx-movie.com/ tt1340773 \n", "280 NaN tt0200469 \n", "2251 NaN tt1729637 \n", "639 NaN tt0183659 \n", "2582 NaN tt0339230 \n", "1190 NaN tt0089984 \n", "2490 NaN tt0113965 \n", "1346 NaN tt0048028 \n", "2117 http://www.americanadobo.com tt0294289 \n", "334 NaN tt0376144 \n", "1541 NaN tt3805180 \n", "2383 NaN tt0065486 \n", "695 NaN tt0374271 \n", "1917 NaN tt0016104 \n", "1874 http://www.vt4.be/vermist tt1133617 \n", "347 NaN tt0118172 \n", "1754 NaN tt0110604 \n", "\n", " original_language original_title \\\n", "1126 en The Avengers \n", "1761 en Furious 7 \n", "2770 en Avengers: Age of Ultron \n", "684 en Beauty and the Beast \n", "2322 en Transformers: Dark of the Moon \n", "906 en The Dark Knight Rises \n", "2135 en Pirates of the Caribbean: On Stranger Tides \n", "2562 en Finding Dory \n", "881 en Alice in Wonderland \n", "734 en Zootopia \n", "2532 en The Hobbit: An Unexpected Journey \n", "1673 en The Dark Knight \n", "2209 en Pirates of the Caribbean: At World's End \n", "666 en The Hobbit: The Desolation of Smaug \n", "961 en Harry Potter and the Half-Blood Prince \n", "543 en The Lord of the Rings: The Two Towers \n", "1735 en Jurassic Park \n", "2387 en Ice Age: Dawn of the Dinosaurs \n", "2737 en Spectre \n", "2802 en Harry Potter and the Chamber of Secrets \n", "114 en The Lord of the Rings: The Fellowship of the Ring \n", "2126 en Guardians of the Galaxy Vol. 2 \n", "689 en Star Wars: Episode III - Revenge of the Sith \n", "1332 en Transformers: Revenge of the Fallen \n", "518 en Wonder Woman \n", "357 en The Lion King \n", "902 en Indiana Jones and the Kingdom of the Crystal S... \n", "2326 en Deadpool \n", "2018 en Guardians of the Galaxy \n", "941 en Maleficent \n", "... ... ... \n", "1006 en Zyzzyx Road \n", "2874 en Nice Guy Johnny \n", "498 en Lake of Fire \n", "2032 en A Farewell to Arms \n", "1884 en In the Cut \n", "269 en Glass: A Portrait of Philip in Twelve Parts \n", "150 en Windwalker \n", "2090 en Deadfall \n", "2399 ja カイジ2 人生奪回ゲーム \n", "2759 en Dou Sing 2 - Gai Tau Dou Sing \n", "450 en Chasing Liberty \n", "312 en The Cookout \n", "1141 ko 북촌방향 \n", "665 en Elektra Luxx \n", "280 en Bats \n", "2251 hi Bodyguard \n", "639 en Pollock \n", "2582 sv Kopps \n", "1190 en He-Man and She-Ra: The Secret of the Sword \n", "2490 en Never Talk to Strangers \n", "1346 en East of Eden \n", "2117 en American Adobo \n", "334 ta Saamy \n", "1541 ru Все и сразу \n", "2383 fr Borsalino \n", "695 hi Tere Naam \n", "1917 en The Merry Widow \n", "1874 en Vermist \n", "347 en The Wind in the Willows \n", "1754 en Mute Witness \n", "\n", " overview popularity \\\n", "1126 When an unexpected enemy emerges and threatens... 89.887648 \n", "1761 Deckard Shaw seeks revenge against Dominic Tor... 27.275687 \n", "2770 When Tony Stark tries to jumpstart a dormant p... 37.379420 \n", "684 A live-action adaptation of Disney's version o... 287.253654 \n", "2322 Sam Witwicky takes his first tenuous steps int... 4.503505 \n", "906 Following the death of District Attorney Harve... 20.582580 \n", "2135 Captain Jack Sparrow crosses paths with a woma... 27.887720 \n", "2562 Dory is reunited with her friends Nemo and Mar... 14.477677 \n", "881 Alice, an unpretentious and individual 19-year... 17.285093 \n", "734 Determined to prove herself, Officer Judy Hopp... 26.024868 \n", "2532 Bilbo Baggins, a hobbit enjoying his quiet lif... 23.253089 \n", "1673 Batman raises the stakes in his war on crime. ... 123.167259 \n", "2209 Captain Barbossa, long believed to be dead, ha... 31.363664 \n", "666 The Dwarves, Bilbo and Gandalf have successful... 20.644776 \n", "961 As Harry begins his sixth year at Hogwarts, he... 19.083723 \n", "543 Frodo and Sam are trekking to Mordor to destro... 29.423537 \n", "1735 A wealthy entrepreneur secretly creates a them... 8.863776 \n", "2387 Times are changing for Manny the moody mammoth... 12.980624 \n", "2737 A cryptic message from Bond‚Äôs past sends him... 24.926577 \n", "2802 Ignoring threats to his life, Harry returns to... 29.741452 \n", "114 Young hobbit Frodo Baggins, after inheriting a... 32.070725 \n", "2126 The Guardians must fight to keep their newfoun... 185.330992 \n", "689 Years after the onset of the Clone Wars, the n... 13.165421 \n", "1332 Sam Witwicky leaves the Autobots behind for a ... 4.351756 \n", "518 An Amazon princess comes to the world of Man t... 294.337037 \n", "357 A young lion cub named Simba can't wait to be ... 21.605761 \n", "902 Set during the Cold War, the Soviets ‚Äì led b... 12.577266 \n", "2326 Deadpool tells the origin story of former Spec... 187.860492 \n", "2018 Light years from Earth, 26 years after being a... 53.291601 \n", "941 The untold story of Disney's most iconic villa... 19.467404 \n", "... ... ... \n", "1006 A married man (Leo Grillo) meets a beautiful w... 0.717595 \n", "2874 Johnny Rizzo, is about to trade his dream job ... 2.166081 \n", "498 An unflinching look at the how the battle over... 0.915050 \n", "2032 British nurse Catherine Barkley (Helen Hayes) ... 1.914697 \n", "1884 Following the gruesome murder of a young woman... 5.799628 \n", "269 Academy Award®-nominated director Scott Hicks ... 0.498877 \n", "150 An ancient Indian warrior who has reached the ... 1.178642 \n", "2090 After he accidentally kills his father, Mike, ... 1.145806 \n", "2399 3 years after the ultimate life-or-death game ... 1.133760 \n", "2759 Lui is the tyrant of gamblers in Macau. He put... 0.084720 \n", "450 The President's daughter, unable to experience... 5.950792 \n", "312 When Todd Anderson signs a $30 million deal wi... 1.758079 \n", "1141 Sang-Joon is a professor in the film departmen... 1.042432 \n", "665 A favor for a woman from her past throws the l... 4.118699 \n", "280 Genetically mutated bats escape and it's up to... 2.584025 \n", "2251 Lovely Singh (Salman Khan) is the bodyguard of... 2.402032 \n", "639 In August of 1949, Life Magazine ran a banner ... 11.179864 \n", "2582 A small Swedish village, H√∂gbotr√§sk, is so p... 2.939129 \n", "1190 After experiencing traumatic nightmares of a t... 2.087446 \n", "2490 Sarah Taylor, a police psychologist, meets a m... 7.506958 \n", "1346 In the Salinas Valley, in and around World War... 6.428830 \n", "2117 The everyday struggles of people trying to bri... 0.158651 \n", "334 Saamy movie is all about Arusaamy (Vikram) who... 0.438490 \n", "1541 NaN 0.201582 \n", "2383 In 1930 Marseilles two small-time crooks join ... 4.471645 \n", "695 Lower Caste Radhey Mohan is a no good slacker,... 1.280455 \n", "1917 Prince Danilo falls in love with dancer Sally ... 0.286719 \n", "1874 Five members of the Missing Persons Task Force... 0.229233 \n", "347 Jailed for his reckless driving, rambunctious ... 1.410322 \n", "1754 Billy is mute, but it hasn't kept her from bec... 2.466066 \n", "\n", " ... release_date runtime \\\n", "1126 ... 4/25/12 143.0 \n", "1761 ... 4/1/15 137.0 \n", "2770 ... 4/22/15 141.0 \n", "684 ... 3/16/17 129.0 \n", "2322 ... 6/28/11 154.0 \n", "906 ... 7/16/12 165.0 \n", "2135 ... 5/14/11 136.0 \n", "2562 ... 6/16/16 97.0 \n", "881 ... 3/3/10 108.0 \n", "734 ... 2/11/16 108.0 \n", "2532 ... 11/26/12 169.0 \n", "1673 ... 7/16/08 152.0 \n", "2209 ... 5/19/07 169.0 \n", "666 ... 12/11/13 161.0 \n", "961 ... 7/7/09 153.0 \n", "543 ... 12/18/02 179.0 \n", "1735 ... 6/11/93 127.0 \n", "2387 ... 6/29/09 94.0 \n", "2737 ... 10/26/15 148.0 \n", "2802 ... 11/13/02 161.0 \n", "114 ... 12/18/01 178.0 \n", "2126 ... 4/19/17 137.0 \n", "689 ... 5/17/05 140.0 \n", "1332 ... 6/19/09 150.0 \n", "518 ... 5/30/17 141.0 \n", "357 ... 6/23/94 89.0 \n", "902 ... 5/21/08 122.0 \n", "2326 ... 2/9/16 108.0 \n", "2018 ... 7/30/14 121.0 \n", "941 ... 5/28/14 97.0 \n", "... ... ... ... \n", "1006 ... 2/25/06 90.0 \n", "2874 ... 11/4/10 89.0 \n", "498 ... 9/9/06 152.0 \n", "2032 ... 12/8/32 89.0 \n", "1884 ... 9/9/03 119.0 \n", "269 ... 9/7/07 119.0 \n", "150 ... 12/12/80 108.0 \n", "2090 ... 10/8/93 98.0 \n", "2399 ... 11/5/11 133.0 \n", "2759 ... 6/28/95 98.0 \n", "450 ... 1/9/04 111.0 \n", "312 ... 9/3/04 97.0 \n", "1141 ... 9/8/11 79.0 \n", "665 ... 3/14/10 98.0 \n", "280 ... 10/22/99 91.0 \n", "2251 ... 8/31/11 130.0 \n", "639 ... 9/6/00 122.0 \n", "2582 ... 2/6/03 90.0 \n", "1190 ... 1/1/85 100.0 \n", "2490 ... 10/20/95 86.0 \n", "1346 ... 3/9/55 115.0 \n", "2117 ... 9/29/01 104.0 \n", "334 ... 5/5/03 140.0 \n", "1541 ... 6/5/14 0.0 \n", "2383 ... 5/19/70 126.0 \n", "695 ... 8/15/03 132.0 \n", "1917 ... 8/26/25 137.0 \n", "1874 ... 1/1/07 0.0 \n", "347 ... 10/16/96 84.0 \n", "1754 ... 9/28/95 95.0 \n", "\n", " spoken_languages status \\\n", "1126 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1761 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2770 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "684 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2322 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "906 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2135 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "2562 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "881 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "734 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2532 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1673 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "2209 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "666 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "961 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "543 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1735 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "2387 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2737 [{'iso_639_1': 'fr', 'name': 'Français'}, {'is... Released \n", "2802 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "114 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2126 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "689 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1332 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "518 [{'iso_639_1': 'de', 'name': 'Deutsch'}, {'iso... Released \n", "357 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "902 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "2326 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2018 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "941 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "... ... ... \n", "1006 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2874 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "498 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2032 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1884 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "269 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "150 NaN Released \n", "2090 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "2399 [{'iso_639_1': 'ja', 'name': '日本語'}] Released \n", "2759 [{'iso_639_1': 'cn', 'name': '广州话 / 廣州話'}] Released \n", "450 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "312 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1141 [{'iso_639_1': 'ko', 'name': '한국어/조선말'}] Released \n", "665 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "280 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2251 [{'iso_639_1': 'hi', 'name': 'हिन्दी'}] Released \n", "639 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2582 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "1190 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "2490 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1346 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2117 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "334 [{'iso_639_1': 'ta', 'name': 'தமிழ்'}] Released \n", "1541 [{'iso_639_1': 'ru', 'name': 'Pусский'}] Released \n", "2383 [{'iso_639_1': 'fr', 'name': 'Français'}, {'is... Released \n", "695 [{'iso_639_1': 'hi', 'name': 'हिन्दी'}] Released \n", "1917 NaN Released \n", "1874 [{'iso_639_1': 'nl', 'name': 'Nederlands'}] Released \n", "347 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "1754 [{'iso_639_1': 'ru', 'name': 'Pусский'}, {'iso... Released \n", "\n", " tagline \\\n", "1126 Some assembly required. \n", "1761 Vengeance Hits Home \n", "2770 A New Age Has Come. \n", "684 Be our guest. \n", "2322 The invasion we always feared. An enemy we nev... \n", "906 The Legend Ends \n", "2135 Live Forever Or Die Trying. \n", "2562 An unforgettable journey she probably won't re... \n", "881 You're invited to a very important date. \n", "734 Welcome to the urban jungle. \n", "2532 From the smallest beginnings come the greatest... \n", "1673 Why So Serious? \n", "2209 At the end of the world, the adventure begins. \n", "666 Beyond darkness... beyond desolation... lies t... \n", "961 Dark Secrets Revealed \n", "543 A New Power Is Rising. \n", "1735 An adventure 65 million years in the making. \n", "2387 You Won't Believe Your Ice! \n", "2737 A Plan No One Escapes \n", "2802 Hogwarts is back in session. \n", "114 One ring to rule them all \n", "2126 Obviously. \n", "689 The saga is complete. \n", "1332 Revenge is coming. \n", "518 Power. Grace. Wisdom. Wonder. \n", "357 Life's greatest adventure is finding your plac... \n", "902 The adventure continues . . . \n", "2326 Witness the beginning of a happy ending \n", "2018 All heroes start somewhere. \n", "941 Don't believe the fairy tale. \n", "... ... \n", "1006 Dead Ahead \n", "2874 NaN \n", "498 Exploring the Issue that Divides the World \n", "2032 Every woman who has loved will understand \n", "1884 Everything you know about desire is dead wrong. \n", "269 In July 2005, filmmaker Scott Hicks started sh... \n", "150 He conquered love and death...now he walks the... \n", "2090 ...The ultimate con \n", "2399 NaN \n", "2759 NaN \n", "450 How do you fall in love with the whole world w... \n", "312 This summer, get your grill on! \n", "1141 NaN \n", "665 From adult film star to functioning adult. \n", "280 They will suck you dry \n", "2251 NaN \n", "639 A true portrait of life and art. \n", "2582 NaN \n", "1190 NaN \n", "2490 In A World Where Love Isn't Always Safe, Trust... \n", "1346 Of what a girl did . . . what a boy did ... of... \n", "2117 Sex is the appetizer. Love is the main course.... \n", "334 NaN \n", "1541 NaN \n", "2383 NaN \n", "695 Unfortunately a true love story \n", "1917 NaN \n", "1874 NaN \n", "347 Go wild in the country! \n", "1754 She Can't Speak. She Can't Scream. She Can't B... \n", "\n", " title \\\n", "1126 The Avengers \n", "1761 Furious 7 \n", "2770 Avengers: Age of Ultron \n", "684 Beauty and the Beast \n", "2322 Transformers: Dark of the Moon \n", "906 The Dark Knight Rises \n", "2135 Pirates of the Caribbean: On Stranger Tides \n", "2562 Finding Dory \n", "881 Alice in Wonderland \n", "734 Zootopia \n", "2532 The Hobbit: An Unexpected Journey \n", "1673 The Dark Knight \n", "2209 Pirates of the Caribbean: At World's End \n", "666 The Hobbit: The Desolation of Smaug \n", "961 Harry Potter and the Half-Blood Prince \n", "543 The Lord of the Rings: The Two Towers \n", "1735 Jurassic Park \n", "2387 Ice Age: Dawn of the Dinosaurs \n", "2737 Spectre \n", "2802 Harry Potter and the Chamber of Secrets \n", "114 The Lord of the Rings: The Fellowship of the Ring \n", "2126 Guardians of the Galaxy Vol. 2 \n", "689 Star Wars: Episode III - Revenge of the Sith \n", "1332 Transformers: Revenge of the Fallen \n", "518 Wonder Woman \n", "357 The Lion King \n", "902 Indiana Jones and the Kingdom of the Crystal S... \n", "2326 Deadpool \n", "2018 Guardians of the Galaxy \n", "941 Maleficent \n", "... ... \n", "1006 Zyzzyx Road \n", "2874 Nice Guy Johnny \n", "498 Lake of Fire \n", "2032 A Farewell to Arms \n", "1884 In the Cut \n", "269 Glass: A Portrait of Philip in Twelve Parts \n", "150 Windwalker \n", "2090 Deadfall \n", "2399 Kaiji 2: The Ultimate Gambler \n", "2759 The Saint of Gamblers \n", "450 Chasing Liberty \n", "312 The Cookout \n", "1141 The Day He Arrives \n", "665 Elektra Luxx \n", "280 Bats \n", "2251 Bodyguard \n", "639 Pollock \n", "2582 Kops \n", "1190 He-Man and She-Ra: The Secret of the Sword \n", "2490 Never Talk to Strangers \n", "1346 East of Eden \n", "2117 American Adobo \n", "334 Saamy \n", "1541 All at Once \n", "2383 Borsalino \n", "695 Tere Naam \n", "1917 The Merry Widow \n", "1874 Missing \n", "347 The Wind in the Willows \n", "1754 Mute Witness \n", "\n", " Keywords \\\n", "1126 [{'id': 242, 'name': 'new york'}, {'id': 5539,... \n", "1761 [{'id': 830, 'name': 'car race'}, {'id': 3428,... \n", "2770 [{'id': 8828, 'name': 'marvel comic'}, {'id': ... \n", "684 [{'id': 254, 'name': 'france'}, {'id': 2343, '... \n", "2322 [{'id': 305, 'name': 'moon'}, {'id': 1612, 'na... \n", "906 [{'id': 849, 'name': 'dc comics'}, {'id': 853,... \n", "2135 [{'id': 658, 'name': 'sea'}, {'id': 1316, 'nam... \n", "2562 [{'id': 1357, 'name': 'fish'}, {'id': 1453, 'n... \n", "881 [{'id': 818, 'name': 'based on novel'}, {'id':... \n", "734 [{'id': 2527, 'name': 'fox'}, {'id': 10360, 'n... \n", "2532 [{'id': 483, 'name': 'riddle'}, {'id': 603, 'n... \n", "1673 [{'id': 849, 'name': 'dc comics'}, {'id': 853,... \n", "2209 [{'id': 270, 'name': 'ocean'}, {'id': 726, 'na... \n", "666 [{'id': 603, 'name': 'elves'}, {'id': 604, 'na... \n", "961 [{'id': 616, 'name': 'witch'}, {'id': 2343, 'n... \n", "543 [{'id': 603, 'name': 'elves'}, {'id': 606, 'na... \n", "1735 [{'id': 911, 'name': 'exotic island'}, {'id': ... \n", "2387 [{'id': 2219, 'name': 'ice age'}, {'id': 3450,... \n", "2737 [{'id': 470, 'name': 'spy'}, {'id': 818, 'name... \n", "2802 [{'id': 391, 'name': 'flying car'}, {'id': 616... \n", "114 [{'id': 603, 'name': 'elves'}, {'id': 604, 'na... \n", "2126 [{'id': 9663, 'name': 'sequel'}, {'id': 9715, ... \n", "689 [{'id': 797, 'name': 'showdown'}, {'id': 10013... \n", "1332 [{'id': 1160, 'name': 'egypt'}, {'id': 1670, '... \n", "518 [{'id': 849, 'name': 'dc comics'}, {'id': 1701... \n", "357 [{'id': 2143, 'name': 'loss of parents'}, {'id... \n", "902 [{'id': 83, 'name': 'saving the world'}, {'id'... \n", "2326 [{'id': 2095, 'name': 'anti hero'}, {'id': 307... \n", "2018 [{'id': 8828, 'name': 'marvel comic'}, {'id': ... \n", "941 [{'id': 3205, 'name': 'fairy tale'}, {'id': 32... \n", "... ... \n", "1006 [{'id': 9937, 'name': 'suspense'}] \n", "2874 [{'id': 966, 'name': 'beach'}, {'id': 2848, 'n... \n", "498 NaN \n", "2032 [{'id': 131, 'name': 'italy'}, {'id': 428, 'na... \n", "1884 [{'id': 1664, 'name': 'eroticism'}, {'id': 993... \n", "269 NaN \n", "150 [{'id': 1262, 'name': 'mountain'}, {'id': 1930... \n", "2090 [{'id': 9826, 'name': 'murder'}, {'id': 15015,... \n", "2399 NaN \n", "2759 [{'id': 395, 'name': 'gambling'}] \n", "450 [{'id': 110, 'name': 'venice'}, {'id': 220, 'n... \n", "312 [{'id': 10267, 'name': 'comedy'}] \n", "1141 NaN \n", "665 [{'id': 10183, 'name': 'independent film'}, {'... \n", "280 [{'id': 2766, 'name': 'mutation'}, {'id': 5155... \n", "2251 [{'id': 2251, 'name': 'lie'}, {'id': 2913, 'na... \n", "639 [{'id': 2679, 'name': 'artist'}, {'id': 3836, ... \n", "2582 [{'id': 5543, 'name': 'cow'}, {'id': 7490, 'na... \n", "1190 [{'id': 1400, 'name': 'swordplay'}, {'id': 193... \n", "2490 [{'id': 6103, 'name': 'telephone'}, {'id': 614... \n", "1346 [{'id': 30, 'name': 'individual'}, {'id': 380,... \n", "2117 [{'id': 10183, 'name': 'independent film'}, {'... \n", "334 NaN \n", "1541 [{'id': 642, 'name': 'robbery'}, {'id': 231149... \n", "2383 NaN \n", "695 NaN \n", "1917 [{'id': 1691, 'name': 'dance'}, {'id': 10181, ... \n", "1874 NaN \n", "347 NaN \n", "1754 [{'id': 3713, 'name': 'chase'}, {'id': 6149, '... \n", "\n", " cast \\\n", "1126 [{'cast_id': 46, 'character': 'Tony Stark / Ir... \n", "1761 [{'cast_id': 17, 'character': 'Dominic Toretto... \n", "2770 [{'cast_id': 76, 'character': 'Tony Stark / Ir... \n", "684 [{'cast_id': 174, 'character': 'Belle', 'credi... \n", "2322 [{'cast_id': 3, 'character': 'Sam Witwicky', '... \n", "906 NaN \n", "2135 [{'cast_id': 15, 'character': 'Captain Jack Sp... \n", "2562 [{'cast_id': 2, 'character': 'Dory (voice)', '... \n", "881 [{'cast_id': 7, 'character': 'Alice Kingsleigh... \n", "734 [{'cast_id': 23, 'character': 'Judy Hopps (voi... \n", "2532 [{'cast_id': 6, 'character': 'Gandalf', 'credi... \n", "1673 [{'cast_id': 35, 'character': 'Bruce Wayne / B... \n", "2209 [{'cast_id': 4, 'character': 'Captain Jack Spa... \n", "666 [{'cast_id': 3, 'character': 'Bilbo Baggins', ... \n", "961 [{'cast_id': 3, 'character': 'Harry Potter', '... \n", "543 [{'cast_id': 13, 'character': 'Frodo Baggins',... \n", "1735 [{'cast_id': 4, 'character': 'Dr. Alan Grant',... \n", "2387 [{'cast_id': 2, 'character': 'Manny (voice)', ... \n", "2737 [{'cast_id': 1, 'character': 'James Bond', 'cr... \n", "2802 [{'cast_id': 23, 'character': 'Harry Potter', ... \n", "114 [{'cast_id': 28, 'character': 'Frodo Baggins',... \n", "2126 [{'cast_id': 3, 'character': 'Peter Quill / St... \n", "689 [{'cast_id': 13, 'character': 'Obi-Wan Kenobi'... \n", "1332 [{'cast_id': 5, 'character': 'Sam Witwicky', '... \n", "518 NaN \n", "357 [{'cast_id': 8, 'character': 'Young Simba (voi... \n", "902 [{'cast_id': 4, 'character': 'Indiana Jones', ... \n", "2326 [{'cast_id': 99, 'character': 'Wade Wilson / D... \n", "2018 [{'cast_id': 1, 'character': 'Peter Quill / St... \n", "941 [{'cast_id': 7, 'character': 'Maleficent', 'cr... \n", "... ... \n", "1006 [{'cast_id': 1, 'character': 'Marissa', 'credi... \n", "2874 [{'cast_id': 3, 'character': 'Johnny Rizzo', '... \n", "498 [{'cast_id': 3, 'character': 'Himself', 'credi... \n", "2032 [{'cast_id': 1, 'character': 'Catherine Barkle... \n", "1884 [{'cast_id': 1, 'character': 'Frannie', 'credi... \n", "269 [{'cast_id': 2, 'character': 'Himself', 'credi... \n", "150 [{'cast_id': 5, 'character': 'Windwalker', 'cr... \n", "2090 [{'cast_id': 3, 'character': 'Joe Donan', 'cre... \n", "2399 [{'cast_id': 1, 'character': 'Kaiji Ito', 'cre... \n", "2759 [{'cast_id': 3, 'character': 'God Bless You', ... \n", "450 [{'cast_id': 1, 'character': 'Anna Foster', 'c... \n", "312 [{'cast_id': 1, 'character': 'blin bling', 'cr... \n", "1141 [{'cast_id': 3, 'character': 'Sungjoon', 'cred... \n", "665 [{'cast_id': 3, 'character': 'Elektra Luxx', '... \n", "280 [{'cast_id': 1, 'character': 'Sheriff Emmett K... \n", "2251 [{'cast_id': 1, 'character': 'Lovely B. Singh'... \n", "639 [{'cast_id': 1, 'character': 'Jackson Pollock'... \n", "2582 [{'cast_id': 1, 'character': 'Jacob', 'credit_... \n", "1190 [{'cast_id': 13, 'character': 'Prince Adam / H... \n", "2490 [{'cast_id': 1, 'character': 'Dr. Sarah Taylor... \n", "1346 [{'cast_id': 4, 'character': 'Caleb Trask', 'c... \n", "2117 [{'cast_id': 1, 'character': '', 'credit_id': ... \n", "334 [{'cast_id': 11, 'character': 'Aaruchamy', 'cr... \n", "1541 [{'cast_id': 3, 'character': 'Viktor', 'credit... \n", "2383 [{'cast_id': 1, 'character': 'Fran√ßois Capell... \n", "695 [{'cast_id': 3, 'character': 'Radhe Mohan', 'c... \n", "1917 [{'cast_id': 2, 'character': \"Sally O'Hara\", '... \n", "1874 [{'cast_id': 2, 'character': 'Murat Hoxha', 'c... \n", "347 [{'cast_id': 2, 'character': 'Toad', 'credit_i... \n", "1754 [{'cast_id': 22, 'character': 'Billy Hughes', ... \n", "\n", " crew revenue \n", "1126 [{'credit_id': '52fe4495c3a368484e02b1cf', 'de... 1519557910 \n", "1761 [{'credit_id': '52fe4cc8c3a36847f823e681', 'de... 1506249360 \n", "2770 [{'credit_id': '55d5f7d4c3a3683e7e0016eb', 'de... 1405403694 \n", "684 [{'credit_id': '551879bec3a3681f840004eb', 'de... 1262886337 \n", "2322 [{'credit_id': '537f0b740e0a2624b40044d0', 'de... 1123746996 \n", "906 NaN 1084939099 \n", "2135 [{'credit_id': '566b4f54c3a3683f56005151', 'de... 1045713802 \n", "2562 [{'credit_id': '55eb2fbb92514106d60041ab', 'de... 1028570889 \n", "881 [{'credit_id': '52fe44c09251416c7503fbc3', 'de... 1025491110 \n", "734 [{'credit_id': '536e09650e0a2647cb00fe6b', 'de... 1023784195 \n", "2532 [{'credit_id': '52fe4783c3a36847f8139f7f', 'de... 1021103568 \n", "1673 [{'credit_id': '55a0eb4a925141296b0010f8', 'de... 1004558444 \n", "2209 [{'credit_id': '52fe4232c3a36847f800b579', 'de... 961000000 \n", "666 [{'credit_id': '5350e7b0c3a3681d93000e5d', 'de... 958400000 \n", "961 [{'credit_id': '52fe4273c3a36847f801fab1', 'de... 933959197 \n", "543 [{'credit_id': '52fe421ac3a36847f800454f', 'de... 926287400 \n", "1735 [{'credit_id': '52fe4238c3a36847f800d291', 'de... 920100000 \n", "2387 [{'credit_id': '52fe44a2c3a36847f80a13a1', 'de... 886686817 \n", "2737 [{'credit_id': '5751eed59251416b60000637', 'de... 880674609 \n", "2802 [{'credit_id': '52fe4267c3a36847f801bf67', 'de... 876688482 \n", "114 [{'credit_id': '52fe421ac3a36847f80043ef', 'de... 871368364 \n", "2126 [{'credit_id': '59171547925141583c0315a6', 'de... 863416141 \n", "689 [{'credit_id': '52fe431fc3a36847f803bea3', 'de... 850000000 \n", "1332 [{'credit_id': '537f0791c3a3680598002d4a', 'de... 836297228 \n", "518 NaN 820580447 \n", "357 [{'credit_id': '59161a6bc3a36842690252b9', 'de... 788241776 \n", "902 [{'credit_id': '52fe4227c3a36847f800847d', 'de... 786636033 \n", "2326 [{'credit_id': '57e564fac3a3681beb0000cc', 'de... 783112979 \n", "2018 [{'credit_id': '538ce329c3a3687155003358', 'de... 773328629 \n", "941 [{'credit_id': '52fe4a0cc3a36847f81b5c9f', 'de... 758539785 \n", "... ... ... \n", "1006 [{'credit_id': '52fe45c6c3a36847f80d9bd3', 'de... 30 \n", "2874 [{'credit_id': '52fe466cc3a36847f80fe3c1', 'de... 25 \n", "498 [{'credit_id': '52fe4681c3a36847f8102b29', 'de... 25 \n", "2032 [{'credit_id': '56d73d3e9251414291002436', 'de... 25 \n", "1884 [{'credit_id': '5628abae9251414cd8001453', 'de... 23 \n", "269 [{'credit_id': '52fe472e9251416c9106caeb', 'de... 20 \n", "150 [{'credit_id': '52fe494b9251416c910a8add', 'de... 18 \n", "2090 [{'credit_id': '52fe45449251416c9102c71f', 'de... 18 \n", "2399 [{'credit_id': '52fe4a68c3a36847f81cc20f', 'de... 15 \n", "2759 [{'credit_id': '52fe48bf9251416c750b1f75', 'de... 13 \n", "450 [{'credit_id': '563c813992514150af00414f', 'de... 12 \n", "312 [{'credit_id': '52fe477f9251416c7509b8ed', 'de... 12 \n", "1141 [{'credit_id': '52fe49499251416c910a86f1', 'de... 11 \n", "665 [{'credit_id': '52fe48f6c3a36847f818264f', 'de... 10 \n", "280 [{'credit_id': '52fe437b9251416c750120e3', 'de... 10 \n", "2251 [{'credit_id': '52fe48a1c3a368484e102d25', 'de... 8 \n", "639 [{'credit_id': '52fe44f19251416c75046447', 'de... 8 \n", "2582 [{'credit_id': '52fe44ddc3a36847f80ae983', 'de... 8 \n", "1190 [{'credit_id': '52fe47d59251416c750a6ef7', 'de... 7 \n", "2490 [{'credit_id': '52fe4775c3a36847f8136a6d', 'de... 6 \n", "1346 [{'credit_id': '52fe4228c3a36847f800869d', 'de... 5 \n", "2117 [{'credit_id': '52fe47d8c3a36847f814b4fd', 'de... 4 \n", "334 [{'credit_id': '52fe4782c3a36847f813999f', 'de... 3 \n", "1541 [{'credit_id': '53b7a7b90e0a2676b8006ab6', 'de... 3 \n", "2383 [{'credit_id': '531cd71cc3a3685c31006e82', 'de... 3 \n", "695 [{'credit_id': '52fe491ec3a36847f8189cfb', 'de... 2 \n", "1917 [{'credit_id': '57351170c3a36802410000d5', 'de... 1 \n", "1874 [{'credit_id': '52fe4838c3a36847f815cef1', 'de... 1 \n", "347 [{'credit_id': '59ce7655c3a3686aca01a522', 'de... 1 \n", "1754 [{'credit_id': '52fe4775c3a36847f8136af1', 'de... 1 \n", "\n", "[3000 rows x 23 columns]" ] }, "execution_count": 353, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.sort_values('revenue', ascending=False)" ] }, { "cell_type": "code", "execution_count": 354, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['id', 'belongs_to_collection', 'budget', 'genres', 'homepage',\n", " 'imdb_id', 'original_language', 'original_title', 'overview',\n", " 'popularity', 'poster_path', 'production_companies',\n", " 'production_countries', 'release_date', 'runtime', 'spoken_languages',\n", " 'status', 'tagline', 'title', 'Keywords', 'cast', 'crew', 'revenue'],\n", " dtype='object')" ] }, "execution_count": 354, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.columns" ] }, { "cell_type": "code", "execution_count": 355, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(8,6))\n", "plt.scatter((train['budget']), (train['revenue']))\n", "plt.title('Revenue vs Budget')\n", "plt.xlabel('Budget')\n", "plt.ylabel('Revenue')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 356, "metadata": { "scrolled": false }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(8,6))\n", "plt.scatter(np.log10(train['budget']), np.log10(train['revenue']))\n", "plt.title('Revenue vs Budget')\n", "plt.xlabel('Budget [log10]')\n", "plt.ylabel('Revenue [log10]')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 357, "metadata": { "scrolled": false }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(8,6))\n", "plt.scatter(np.log10(train['popularity']), np.log10(train['revenue']))\n", "plt.title('Revenue vs popularity')\n", "plt.xlabel('Popularity [log]')\n", "plt.ylabel('Revenue [log]')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 358, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "train.sort_values('budget', ascending=False).head(10).plot(x='original_title', y='budget', kind='barh')\n", "plt.xlabel('Budget [USD]');" ] }, { "cell_type": "code", "execution_count": 359, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "train.sort_values('revenue', ascending=False).head(10).plot(x='original_title', \n", " y='revenue', kind='barh')\n", "plt.xlabel('Revenue [USD]');" ] }, { "cell_type": "code", "execution_count": 360, "metadata": { "scrolled": true }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "train.assign(profit = lambda df: df['revenue'] - df['budget'] ).sort_values('profit', \n", " ascending=False).head(10).plot(x='original_title', \n", " y='profit', kind='barh')\n", "plt.xlabel('Profit [USD]');" ] }, { "cell_type": "code", "execution_count": 361, "metadata": {}, "outputs": [], "source": [ "# Function to parse the first 'name' value from this structure of a list of dictionaries\n", "def parse_json(x):\n", " try:\n", " return json.loads(x.replace(\"'\", '\"'))[0]['name']\n", " except:\n", " return ''" ] }, { "cell_type": "code", "execution_count": 362, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 [{'id': 35, 'name': 'Comedy'}]\n", "1 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...\n", "2 [{'id': 18, 'name': 'Drama'}]\n", "3 [{'id': 53, 'name': 'Thriller'}, {'id': 18, 'n...\n", "4 [{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...\n", "Name: genres, dtype: object" ] }, "execution_count": 362, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train['genres'].head()" ] }, { "cell_type": "code", "execution_count": 363, "metadata": {}, "outputs": [], "source": [ "train['genres'] = train['genres'].apply(parse_json)" ] }, { "cell_type": "code", "execution_count": 364, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 Comedy\n", "1 Comedy\n", "2 Drama\n", "3 Thriller\n", "4 Action\n", "Name: genres, dtype: object" ] }, "execution_count": 364, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train['genres'].head()" ] }, { "cell_type": "code", "execution_count": 365, "metadata": { "scrolled": true }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "train.groupby('genres')['revenue'].mean().sort_values().plot(kind='barh')\n", "plt.xlabel('Revenue [USD]');" ] }, { "cell_type": "code", "execution_count": 366, "metadata": {}, "outputs": [], "source": [ "train['collection'] = ~train['belongs_to_collection'].isna()" ] }, { "cell_type": "code", "execution_count": 367, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False 2396\n", "True 604\n", "Name: collection, dtype: int64" ] }, "execution_count": 367, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train['collection'].value_counts()" ] }, { "cell_type": "code", "execution_count": 368, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, ax= plt.subplots(figsize=(8,6))\n", "ax.set_yscale('symlog')\n", "sns.boxplot(x= 'collection', y='revenue', data=train, ax=ax);" ] }, { "cell_type": "code", "execution_count": 369, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "0 [{'name': 'Paramount Pictures', 'id': 4}, {'na...\n", "1 [{'name': 'Walt Disney Pictures', 'id': 2}]\n", "2 [{'name': 'Bold Films', 'id': 2266}, {'name': ...\n", "3 NaN\n", "4 NaN\n", "5 NaN\n", "6 [{'name': 'Ghost House Pictures', 'id': 768}, ...\n", "7 NaN\n", "8 [{'name': 'Walt Disney Pictures', 'id': 2}, {'...\n", "9 [{'name': 'Castle Rock Entertainment', 'id': 97}]\n", "Name: production_companies, dtype: object" ] }, "execution_count": 369, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train['production_companies'].head(10)" ] }, { "cell_type": "code", "execution_count": 370, "metadata": {}, "outputs": [], "source": [ "train['production_companies'] = train['production_companies'].apply(parse_json)" ] }, { "cell_type": "code", "execution_count": 371, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "0 Paramount Pictures\n", "1 Walt Disney Pictures\n", "2 Bold Films\n", "3 \n", "4 \n", "5 \n", "6 Ghost House Pictures\n", "7 \n", "8 Walt Disney Pictures\n", "9 Castle Rock Entertainment\n", "Name: production_companies, dtype: object" ] }, "execution_count": 371, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train['production_companies'].head(10)" ] }, { "cell_type": "code", "execution_count": 372, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "train.groupby('production_companies')['revenue'].mean().sort_values(ascending=False).head(20).plot(kind='barh')\n", "plt.xlabel('Revenue [USD]');" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Data Preperation" ] }, { "cell_type": "code", "execution_count": 373, "metadata": {}, "outputs": [], "source": [ "# using SentimentIntensityAnalyzer function from the vaderSentiment package\n", "# for an analysis of the sentiment of the films 'overview' and 'tagline'\n", "analyser = SentimentIntensityAnalyzer()" ] }, { "cell_type": "code", "execution_count": 374, "metadata": {}, "outputs": [], "source": [ "# Fill out the NaNs values in 'overview' and 'tagline'\n", "# with an empty string ('') before processing the analyser scores\n", "train['overview'] = train['overview'].fillna('')\n", "train['tagline'] = train['tagline'].fillna('')" ] }, { "cell_type": "code", "execution_count": 375, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'neg': 0.069, 'neu': 0.747, 'pos': 0.184, 'compound': 0.6124}" ] }, "execution_count": 375, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Sample of vaderSentiment usage\n", "# The Sum of neg, neu, pos is always one\n", "# 'copound' is a composition score of the other values \n", "analyser.polarity_scores(train['overview'].iloc[0])" ] }, { "cell_type": "code", "execution_count": 376, "metadata": {}, "outputs": [], "source": [ "# using only the 'compound' output of vaderSentiment on the 'overview' column\n", "# 'compound' gathers the general picture related to sentiment analysis (Composition score)\n", "# x.lower() will convert all the letters into lower case\n", "train['sentiment'] = train['overview'].apply(lambda x: analyser.polarity_scores(x.lower())['compound'])" ] }, { "cell_type": "code", "execution_count": 377, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 377, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "train.groupby(pd.cut(train['sentiment'], 6))['revenue'].mean().plot(kind='barh')" ] }, { "cell_type": "code", "execution_count": 378, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 378, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# using only the 'compound' output of vaderSentiment on the 'tagline' column\n", "# 'compound' gathers the general picture related to sentiment analysis (Composition score)\n", "train['tag_sentiment'] = train['tagline'].apply(lambda x: analyser.polarity_scores(x.lower())['compound'])\n", "train.groupby(pd.cut(train['tag_sentiment'], 6))['revenue'].mean().plot(kind='barh')" ] }, { "cell_type": "code", "execution_count": 379, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tag_sentiment 0.014762\n", "sentiment -0.020765\n", "dtype: float64" ] }, "execution_count": 379, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# As we can see from the sentiment analysis, there is (almost) no correlation between\n", "# the 'compound' value generated by vaderSentiment package (a composition sentiment value)\n", "# To the 'overview' and 'tagline' columns,\n", "# hence, we will drop the 'overview', 'tagline' columns in the data-preparation step (the next step)\n", "train[['tag_sentiment', 'sentiment']].corrwith(train['revenue'])" ] }, { "cell_type": "code", "execution_count": 380, "metadata": {}, "outputs": [], "source": [ "import ast" ] }, { "cell_type": "code", "execution_count": 381, "metadata": {}, "outputs": [], "source": [ "# Helper function to parse text and convert given strings to lists \n", "def text_to_list(x):\n", " if pd.isna(x):\n", " return ''\n", " else:\n", " return ast.literal_eval(x)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Sample of usages with text_to_list" ] }, { "cell_type": "code", "execution_count": 382, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[3, 4, 5, 6]" ] }, "execution_count": 382, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# '[3,4,5,6]' is a string, it is not a list, using the function will convert it to a list\n", "ast.literal_eval('[3,4,5,6]')" ] }, { "cell_type": "code", "execution_count": 383, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'id': 35, 'name': 'Comedy'}]" ] }, "execution_count": 383, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Another example of usage\n", "ast.literal_eval(\"[{'id': 35, 'name': 'Comedy'}]\")" ] }, { "cell_type": "code", "execution_count": 384, "metadata": {}, "outputs": [], "source": [ "# Load the Train and Test sets and keep the size of the sets in two variables (ntrain, ntest)\n", "train = pd.read_csv('train.csv')\n", "ntrain = train.shape[0]\n", "test = pd.read_csv('test.csv')\n", "ntest = test.shape[0]" ] }, { "cell_type": "code", "execution_count": 385, "metadata": {}, "outputs": [], "source": [ "# Combine the train and the test sets, all the data preparations\n", "# will be done on the combined set.\n", "# The combined set will be split again at the end of the preparation step\n", "combined = pd.concat((train, test), sort=False)" ] }, { "cell_type": "code", "execution_count": 386, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idbelongs_to_collectionbudgetgenreshomepageimdb_idoriginal_languageoriginal_titleoverviewpopularity...release_dateruntimespoken_languagesstatustaglinetitleKeywordscastcrewrevenue
01[{'id': 313576, 'name': 'Hot Tub Time Machine ...14000000[{'id': 35, 'name': 'Comedy'}]NaNtt2637294enHot Tub Time Machine 2When Lou, who has become the \"father of the In...6.575393...2/20/1593.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThe Laws of Space and Time are About to be Vio...Hot Tub Time Machine 2[{'id': 4379, 'name': 'time travel'}, {'id': 9...[{'cast_id': 4, 'character': 'Lou', 'credit_id...[{'credit_id': '59ac067c92514107af02c8c8', 'de...12314651.0
12[{'id': 107674, 'name': 'The Princess Diaries ...40000000[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...NaNtt0368933enThe Princess Diaries 2: Royal EngagementMia Thermopolis is now a college graduate and ...8.248895...8/6/04113.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedIt can take a lifetime to find true love; she'...The Princess Diaries 2: Royal Engagement[{'id': 2505, 'name': 'coronation'}, {'id': 42...[{'cast_id': 1, 'character': 'Mia Thermopolis'...[{'credit_id': '52fe43fe9251416c7502563d', 'de...95149435.0
23NaN3300000[{'id': 18, 'name': 'Drama'}]http://sonyclassics.com/whiplash/tt2582802enWhiplashUnder the direction of a ruthless instructor, ...64.299990...10/10/14105.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThe road to greatness can take you to the edge.Whiplash[{'id': 1416, 'name': 'jazz'}, {'id': 1523, 'n...[{'cast_id': 5, 'character': 'Andrew Neimann',...[{'credit_id': '54d5356ec3a3683ba0000039', 'de...13092000.0
34NaN1200000[{'id': 53, 'name': 'Thriller'}, {'id': 18, 'n...http://kahaanithefilm.com/tt1821480hiKahaaniVidya Bagchi (Vidya Balan) arrives in Kolkata ...3.174936...3/9/12122.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedNaNKahaani[{'id': 10092, 'name': 'mystery'}, {'id': 1054...[{'cast_id': 1, 'character': 'Vidya Bagchi', '...[{'credit_id': '52fe48779251416c9108d6eb', 'de...16000000.0
45NaN0[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...NaNtt1380152ko마린보이Marine Boy is the story of a former national s...1.148070...2/5/09118.0[{'iso_639_1': 'ko', 'name': '한국어/조선말'}]ReleasedNaNMarine BoyNaN[{'cast_id': 3, 'character': 'Chun-soo', 'cred...[{'credit_id': '52fe464b9251416c75073b43', 'de...3923970.0
\n", "

5 rows × 23 columns

\n", "
" ], "text/plain": [ " id belongs_to_collection budget \\\n", "0 1 [{'id': 313576, 'name': 'Hot Tub Time Machine ... 14000000 \n", "1 2 [{'id': 107674, 'name': 'The Princess Diaries ... 40000000 \n", "2 3 NaN 3300000 \n", "3 4 NaN 1200000 \n", "4 5 NaN 0 \n", "\n", " genres \\\n", "0 [{'id': 35, 'name': 'Comedy'}] \n", "1 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n", "2 [{'id': 18, 'name': 'Drama'}] \n", "3 [{'id': 53, 'name': 'Thriller'}, {'id': 18, 'n... \n", "4 [{'id': 28, 'name': 'Action'}, {'id': 53, 'nam... \n", "\n", " homepage imdb_id original_language \\\n", "0 NaN tt2637294 en \n", "1 NaN tt0368933 en \n", "2 http://sonyclassics.com/whiplash/ tt2582802 en \n", "3 http://kahaanithefilm.com/ tt1821480 hi \n", "4 NaN tt1380152 ko \n", "\n", " original_title \\\n", "0 Hot Tub Time Machine 2 \n", "1 The Princess Diaries 2: Royal Engagement \n", "2 Whiplash \n", "3 Kahaani \n", "4 마린보이 \n", "\n", " overview popularity ... \\\n", "0 When Lou, who has become the \"father of the In... 6.575393 ... \n", "1 Mia Thermopolis is now a college graduate and ... 8.248895 ... \n", "2 Under the direction of a ruthless instructor, ... 64.299990 ... \n", "3 Vidya Bagchi (Vidya Balan) arrives in Kolkata ... 3.174936 ... \n", "4 Marine Boy is the story of a former national s... 1.148070 ... \n", "\n", " release_date runtime spoken_languages \\\n", "0 2/20/15 93.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "1 8/6/04 113.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "2 10/10/14 105.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "3 3/9/12 122.0 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... \n", "4 2/5/09 118.0 [{'iso_639_1': 'ko', 'name': '한국어/조선말'}] \n", "\n", " status tagline \\\n", "0 Released The Laws of Space and Time are About to be Vio... \n", "1 Released It can take a lifetime to find true love; she'... \n", "2 Released The road to greatness can take you to the edge. \n", "3 Released NaN \n", "4 Released NaN \n", "\n", " title \\\n", "0 Hot Tub Time Machine 2 \n", "1 The Princess Diaries 2: Royal Engagement \n", "2 Whiplash \n", "3 Kahaani \n", "4 Marine Boy \n", "\n", " Keywords \\\n", "0 [{'id': 4379, 'name': 'time travel'}, {'id': 9... \n", "1 [{'id': 2505, 'name': 'coronation'}, {'id': 42... \n", "2 [{'id': 1416, 'name': 'jazz'}, {'id': 1523, 'n... \n", "3 [{'id': 10092, 'name': 'mystery'}, {'id': 1054... \n", "4 NaN \n", "\n", " cast \\\n", "0 [{'cast_id': 4, 'character': 'Lou', 'credit_id... \n", "1 [{'cast_id': 1, 'character': 'Mia Thermopolis'... \n", "2 [{'cast_id': 5, 'character': 'Andrew Neimann',... \n", "3 [{'cast_id': 1, 'character': 'Vidya Bagchi', '... \n", "4 [{'cast_id': 3, 'character': 'Chun-soo', 'cred... \n", "\n", " crew revenue \n", "0 [{'credit_id': '59ac067c92514107af02c8c8', 'de... 12314651.0 \n", "1 [{'credit_id': '52fe43fe9251416c7502563d', 'de... 95149435.0 \n", "2 [{'credit_id': '54d5356ec3a3683ba0000039', 'de... 13092000.0 \n", "3 [{'credit_id': '52fe48779251416c9108d6eb', 'de... 16000000.0 \n", "4 [{'credit_id': '52fe464b9251416c75073b43', 'de... 3923970.0 \n", "\n", "[5 rows x 23 columns]" ] }, "execution_count": 386, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined.head()" ] }, { "cell_type": "code", "execution_count": 387, "metadata": {}, "outputs": [], "source": [ "# Drop all of the not-relevant columns from the combined dataset\n", "# Columns that will not contribute to predicting the revenue\n", "combined.drop(columns=['id','imdb_id', 'poster_path', 'title', 'original_title'], inplace=True)" ] }, { "cell_type": "code", "execution_count": 388, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
belongs_to_collectionbudgetgenreshomepageoriginal_languageoverviewpopularityproduction_companiesproduction_countriesrelease_dateruntimespoken_languagesstatustaglineKeywordscastcrewrevenue
0[{'id': 313576, 'name': 'Hot Tub Time Machine ...14000000[{'id': 35, 'name': 'Comedy'}]NaNenWhen Lou, who has become the \"father of the In...6.575393[{'name': 'Paramount Pictures', 'id': 4}, {'na...[{'iso_3166_1': 'US', 'name': 'United States o...2/20/1593.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThe Laws of Space and Time are About to be Vio...[{'id': 4379, 'name': 'time travel'}, {'id': 9...[{'cast_id': 4, 'character': 'Lou', 'credit_id...[{'credit_id': '59ac067c92514107af02c8c8', 'de...12314651.0
1[{'id': 107674, 'name': 'The Princess Diaries ...40000000[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...NaNenMia Thermopolis is now a college graduate and ...8.248895[{'name': 'Walt Disney Pictures', 'id': 2}][{'iso_3166_1': 'US', 'name': 'United States o...8/6/04113.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedIt can take a lifetime to find true love; she'...[{'id': 2505, 'name': 'coronation'}, {'id': 42...[{'cast_id': 1, 'character': 'Mia Thermopolis'...[{'credit_id': '52fe43fe9251416c7502563d', 'de...95149435.0
2NaN3300000[{'id': 18, 'name': 'Drama'}]http://sonyclassics.com/whiplash/enUnder the direction of a ruthless instructor, ...64.299990[{'name': 'Bold Films', 'id': 2266}, {'name': ...[{'iso_3166_1': 'US', 'name': 'United States o...10/10/14105.0[{'iso_639_1': 'en', 'name': 'English'}]ReleasedThe road to greatness can take you to the edge.[{'id': 1416, 'name': 'jazz'}, {'id': 1523, 'n...[{'cast_id': 5, 'character': 'Andrew Neimann',...[{'credit_id': '54d5356ec3a3683ba0000039', 'de...13092000.0
3NaN1200000[{'id': 53, 'name': 'Thriller'}, {'id': 18, 'n...http://kahaanithefilm.com/hiVidya Bagchi (Vidya Balan) arrives in Kolkata ...3.174936NaN[{'iso_3166_1': 'IN', 'name': 'India'}]3/9/12122.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...ReleasedNaN[{'id': 10092, 'name': 'mystery'}, {'id': 1054...[{'cast_id': 1, 'character': 'Vidya Bagchi', '...[{'credit_id': '52fe48779251416c9108d6eb', 'de...16000000.0
4NaN0[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...NaNkoMarine Boy is the story of a former national s...1.148070NaN[{'iso_3166_1': 'KR', 'name': 'South Korea'}]2/5/09118.0[{'iso_639_1': 'ko', 'name': '한국어/조선말'}]ReleasedNaNNaN[{'cast_id': 3, 'character': 'Chun-soo', 'cred...[{'credit_id': '52fe464b9251416c75073b43', 'de...3923970.0
\n", "
" ], "text/plain": [ " belongs_to_collection budget \\\n", "0 [{'id': 313576, 'name': 'Hot Tub Time Machine ... 14000000 \n", "1 [{'id': 107674, 'name': 'The Princess Diaries ... 40000000 \n", "2 NaN 3300000 \n", "3 NaN 1200000 \n", "4 NaN 0 \n", "\n", " genres \\\n", "0 [{'id': 35, 'name': 'Comedy'}] \n", "1 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n", "2 [{'id': 18, 'name': 'Drama'}] \n", "3 [{'id': 53, 'name': 'Thriller'}, {'id': 18, 'n... \n", "4 [{'id': 28, 'name': 'Action'}, {'id': 53, 'nam... \n", "\n", " homepage original_language \\\n", "0 NaN en \n", "1 NaN en \n", "2 http://sonyclassics.com/whiplash/ en \n", "3 http://kahaanithefilm.com/ hi \n", "4 NaN ko \n", "\n", " overview popularity \\\n", "0 When Lou, who has become the \"father of the In... 6.575393 \n", "1 Mia Thermopolis is now a college graduate and ... 8.248895 \n", "2 Under the direction of a ruthless instructor, ... 64.299990 \n", "3 Vidya Bagchi (Vidya Balan) arrives in Kolkata ... 3.174936 \n", "4 Marine Boy is the story of a former national s... 1.148070 \n", "\n", " production_companies \\\n", "0 [{'name': 'Paramount Pictures', 'id': 4}, {'na... \n", "1 [{'name': 'Walt Disney Pictures', 'id': 2}] \n", "2 [{'name': 'Bold Films', 'id': 2266}, {'name': ... \n", "3 NaN \n", "4 NaN \n", "\n", " production_countries release_date runtime \\\n", "0 [{'iso_3166_1': 'US', 'name': 'United States o... 2/20/15 93.0 \n", "1 [{'iso_3166_1': 'US', 'name': 'United States o... 8/6/04 113.0 \n", "2 [{'iso_3166_1': 'US', 'name': 'United States o... 10/10/14 105.0 \n", "3 [{'iso_3166_1': 'IN', 'name': 'India'}] 3/9/12 122.0 \n", "4 [{'iso_3166_1': 'KR', 'name': 'South Korea'}] 2/5/09 118.0 \n", "\n", " spoken_languages status \\\n", "0 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "1 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "2 [{'iso_639_1': 'en', 'name': 'English'}] Released \n", "3 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released \n", "4 [{'iso_639_1': 'ko', 'name': '한국어/조선말'}] Released \n", "\n", " tagline \\\n", "0 The Laws of Space and Time are About to be Vio... \n", "1 It can take a lifetime to find true love; she'... \n", "2 The road to greatness can take you to the edge. \n", "3 NaN \n", "4 NaN \n", "\n", " Keywords \\\n", "0 [{'id': 4379, 'name': 'time travel'}, {'id': 9... \n", "1 [{'id': 2505, 'name': 'coronation'}, {'id': 42... \n", "2 [{'id': 1416, 'name': 'jazz'}, {'id': 1523, 'n... \n", "3 [{'id': 10092, 'name': 'mystery'}, {'id': 1054... \n", "4 NaN \n", "\n", " cast \\\n", "0 [{'cast_id': 4, 'character': 'Lou', 'credit_id... \n", "1 [{'cast_id': 1, 'character': 'Mia Thermopolis'... \n", "2 [{'cast_id': 5, 'character': 'Andrew Neimann',... \n", "3 [{'cast_id': 1, 'character': 'Vidya Bagchi', '... \n", "4 [{'cast_id': 3, 'character': 'Chun-soo', 'cred... \n", "\n", " crew revenue \n", "0 [{'credit_id': '59ac067c92514107af02c8c8', 'de... 12314651.0 \n", "1 [{'credit_id': '52fe43fe9251416c7502563d', 'de... 95149435.0 \n", "2 [{'credit_id': '54d5356ec3a3683ba0000039', 'de... 13092000.0 \n", "3 [{'credit_id': '52fe48779251416c9108d6eb', 'de... 16000000.0 \n", "4 [{'credit_id': '52fe464b9251416c75073b43', 'de... 3923970.0 " ] }, "execution_count": 388, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined.head()" ] }, { "cell_type": "code", "execution_count": 389, "metadata": {}, "outputs": [], "source": [ "# Preparation for the parsing step applying 'text_to_list' function on the relevant columns\n", "for col in ['genres', 'production_companies', 'production_countries', \\\n", " 'spoken_languages', 'Keywords', 'cast', 'crew']:\n", " combined[col] = combined[col].apply(text_to_list)" ] }, { "cell_type": "code", "execution_count": 390, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 [{'id': 313576, 'name': 'Hot Tub Time Machine ...\n", "1 [{'id': 107674, 'name': 'The Princess Diaries ...\n", "2 NaN\n", "3 NaN\n", "4 NaN\n", "Name: belongs_to_collection, dtype: object" ] }, "execution_count": 390, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['belongs_to_collection'].head()" ] }, { "cell_type": "code", "execution_count": 391, "metadata": {}, "outputs": [], "source": [ "# Converts the 'belogs_to_collection' column to a zero/one column\n", "# Every value that includes a value (meaning the movie belong to a collection) will be converted to 1\n", "# Every value that includes a NaN (meaning the movie does not belong to a collection) will be converted to 0\n", "combined['belongs_to_collection'] = 1*(~combined['belongs_to_collection'].isna())" ] }, { "cell_type": "code", "execution_count": 392, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 1\n", "2 0\n", "3 0\n", "4 0\n", "Name: belongs_to_collection, dtype: int64" ] }, "execution_count": 392, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['belongs_to_collection'].head()" ] }, { "cell_type": "code", "execution_count": 393, "metadata": {}, "outputs": [], "source": [ "# Reminder, a Sentiment analysis Revealed that there is no correlation between\n", "# the columns: 'overview' and 'tagline' to the 'revenue' column. (our predicted column)\n", "# Hence, we will create a binary label for each movie 'tagline' (and for 'homepage' as well later) \n", "# (for every movie: has or has not a 'tagline' and a 'homepage')\n", "# The second step will be to create a new feature with an overview of characters count.\n", "combined['tagline'] = 1*(~combined['tagline'].isna())" ] }, { "cell_type": "code", "execution_count": 394, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 1\n", "2 1\n", "3 0\n", "4 0\n", "Name: tagline, dtype: int64" ] }, "execution_count": 394, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['tagline'].head()" ] }, { "cell_type": "code", "execution_count": 395, "metadata": {}, "outputs": [], "source": [ "combined['homepage'] = 1*(~combined['homepage'].isna())" ] }, { "cell_type": "code", "execution_count": 396, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 0\n", "1 0\n", "2 1\n", "3 1\n", "4 0\n", "Name: homepage, dtype: int64" ] }, "execution_count": 396, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['homepage'].head()" ] }, { "cell_type": "code", "execution_count": 397, "metadata": {}, "outputs": [], "source": [ "# New feature includes the number of characters in each movie's overview\n", "combined['overview'] = combined['overview'].str.len()\n", "# Any movie without an overview (Nan) will set to zero\n", "combined['overview'].fillna(0, inplace=True)" ] }, { "cell_type": "code", "execution_count": 398, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 155.0\n", "1 393.0\n", "2 130.0\n", "3 581.0\n", "4 168.0\n", "Name: overview, dtype: float64" ] }, "execution_count": 398, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['overview'].head()" ] }, { "cell_type": "code", "execution_count": 399, "metadata": {}, "outputs": [], "source": [ "# Create a new feature contains the NUMBER of genres for each movie\n", "combined['genre_number'] = combined['genres'].apply(lambda x: len(x))" ] }, { "cell_type": "code", "execution_count": 400, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 4\n", "2 1\n", "3 2\n", "4 2\n", "Name: genre_number, dtype: int64" ] }, "execution_count": 400, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['genre_number'].head()" ] }, { "cell_type": "code", "execution_count": 401, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "2 2379\n", "3 2208\n", "1 1488\n", "4 967\n", "5 280\n", "6 48\n", "0 23\n", "7 4\n", "8 1\n", "Name: genre_number, dtype: int64" ] }, "execution_count": 401, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['genre_number'].value_counts()" ] }, { "cell_type": "code", "execution_count": 402, "metadata": {}, "outputs": [], "source": [ "# function to parse the genre of a movie.\n", "# few movies do not have a genre value, some have more than one genre value\n", "# the function will parse only the first 3 genres (if exist)\n", "# and create 3 new columns named: 'genres1', 'genres2', 'genres3'\n", "# in the combined dataset.\n", "def parse_genre(x):\n", " if type(x) == str:\n", " return pd.Series(['','',''], index=['genres1', 'genres2', 'genres3'] )\n", " if len(x) == 1:\n", " return pd.Series([x[0]['name'],'',''], index=['genres1', 'genres2', 'genres3'] )\n", " if len(x) == 2:\n", " return pd.Series([x[0]['name'],x[1]['name'],''], index=['genres1', 'genres2', 'genres3'] )\n", " if len(x) > 2:\n", " return pd.Series([x[0]['name'],x[1]['name'],x[2]['name']], index=['genres1', 'genres2', 'genres3'] )" ] }, { "cell_type": "code", "execution_count": 403, "metadata": {}, "outputs": [], "source": [ "# Apply the function to create 3 new columns \n", "# and drop the original 'genres' column\n", "combined[['genres1', 'genres2', 'genres3']] = \\\n", " combined['genres'].apply(parse_genre)\n", "combined.drop(columns='genres', inplace=True)" ] }, { "cell_type": "code", "execution_count": 404, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "0 Comedy\n", "1 Comedy\n", "2 Drama\n", "3 Thriller\n", "4 Action\n", "Name: genres1, dtype: object" ] }, "execution_count": 404, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['genres1'].head()" ] }, { "cell_type": "code", "execution_count": 405, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 [{'name': 'Paramount Pictures', 'id': 4}, {'na...\n", "1 [{'name': 'Walt Disney Pictures', 'id': 2}]\n", "2 [{'name': 'Bold Films', 'id': 2266}, {'name': ...\n", "3 \n", "4 \n", "Name: production_companies, dtype: object" ] }, "execution_count": 405, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['production_companies'].head()" ] }, { "cell_type": "code", "execution_count": 406, "metadata": {}, "outputs": [], "source": [ "# Create a new column with the number of production companies \n", "# related to each movie\n", "combined['production_company_number'] = \\\n", " combined['production_companies'].apply(lambda x: len(x))" ] }, { "cell_type": "code", "execution_count": 407, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 3\n", "1 1\n", "2 3\n", "3 0\n", "4 0\n", "Name: production_company_number, dtype: int64" ] }, "execution_count": 407, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['production_company_number'].head()" ] }, { "cell_type": "code", "execution_count": 408, "metadata": {}, "outputs": [], "source": [ "# function to parse the production companies of a movie.\n", "# few movies do not have a production companies value, some have more than one value\n", "# the function will parse only the first 3 production companies (if exist)\n", "# and create 3 new columns named: 'prod1', 'prod2', 'prod3'\n", "# in the combined dataset\n", "def parse_production_companies(x):\n", " if type(x) == str:\n", " return pd.Series(['','',''], index=['prod1', 'prod2', 'prod3'] )\n", " if len(x) == 1:\n", " return pd.Series([x[0]['name'],'',''], index=['prod1', 'prod2', 'prod3'] )\n", " if len(x) == 2:\n", " return pd.Series([x[0]['name'],x[1]['name'],''], index=['prod1', 'prod2', 'prod3'] )\n", " if len(x) > 2:\n", " return pd.Series([x[0]['name'],x[1]['name'],x[2]['name']], index=['prod1', 'prod2', 'prod3'] )" ] }, { "cell_type": "code", "execution_count": 409, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 [{'name': 'Paramount Pictures', 'id': 4}, {'na...\n", "1 [{'name': 'Walt Disney Pictures', 'id': 2}]\n", "2 [{'name': 'Bold Films', 'id': 2266}, {'name': ...\n", "3 \n", "4 \n", "5 \n", "6 [{'name': 'Ghost House Pictures', 'id': 768}, ...\n", "7 \n", "8 [{'name': 'Walt Disney Pictures', 'id': 2}, {'...\n", "9 [{'name': 'Castle Rock Entertainment', 'id': 97}]\n", "Name: production_companies, dtype: object" ] }, "execution_count": 409, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['production_companies'].head(10)" ] }, { "cell_type": "code", "execution_count": 410, "metadata": {}, "outputs": [], "source": [ "# Apply the function to create 3 new columns \n", "# and drop the original 'production companies' column\n", "combined[['prod1', 'prod2', 'prod3']] = \\\n", " combined['production_companies'].apply(parse_production_companies)\n", "combined.drop(columns='production_companies', inplace=True)" ] }, { "cell_type": "code", "execution_count": 411, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "0 Paramount Pictures\n", "1 Walt Disney Pictures\n", "2 Bold Films\n", "3 \n", "4 \n", "5 \n", "6 Ghost House Pictures\n", "7 \n", "8 Walt Disney Pictures\n", "9 Castle Rock Entertainment\n", "Name: prod1, dtype: object" ] }, "execution_count": 411, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['prod1'].head(10)" ] }, { "cell_type": "code", "execution_count": 412, "metadata": {}, "outputs": [], "source": [ "# Create a new column with the number of production countries \n", "# related to each movie \n", "combined['production_country_number'] = \\\n", " combined['production_countries'].apply(lambda x: len(x))" ] }, { "cell_type": "code", "execution_count": 415, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 1\n", "2 1\n", "3 1\n", "4 1\n", "5 0\n", "6 2\n", "7 0\n", "8 1\n", "9 1\n", "Name: production_country_number, dtype: int64" ] }, "execution_count": 415, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['production_country_number'].head(10)" ] }, { "cell_type": "code", "execution_count": 416, "metadata": {}, "outputs": [], "source": [ "# function to parse the production countries of a movie.\n", "# few movies do not have a production countries value, some have more than one value\n", "# the function will parse only the first 3 production countries (if exist)\n", "# and create 3 new columns named: 'country1', 'country2', 'country3'\n", "# in the combined dataset\n", "def parse_production_countries(x):\n", " if type(x) == str:\n", " return pd.Series(['','',''], index=['country1', 'country2', 'country3'] )\n", " if len(x) == 1:\n", " return pd.Series([x[0]['name'],'',''], index=['country1', 'country2', 'country3'] )\n", " if len(x) == 2:\n", " return pd.Series([x[0]['name'],x[1]['name'],''], index=['country1', 'country2', 'country3'] )\n", " if len(x) > 2:\n", " return pd.Series([x[0]['name'],x[1]['name'],x[2]['name']], index=['country1', 'country2', 'country3'] )" ] }, { "cell_type": "code", "execution_count": 417, "metadata": {}, "outputs": [], "source": [ "# Apply the function to create 3 new columns and drop the original \n", "# 'production countries' column\n", "combined[['country1', 'country2', 'country3']] = \\\n", " combined['production_countries'].apply(parse_production_countries)\n", "combined.drop(columns='production_countries', inplace=True)" ] }, { "cell_type": "code", "execution_count": 418, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
belongs_to_collectionbudgethomepageoriginal_languageoverviewpopularityrelease_dateruntimespoken_languagesstatus...genres2genres3production_company_numberprod1prod2prod3production_country_numbercountry1country2country3
01140000000en155.06.5753932/20/1593.0[{'iso_639_1': 'en', 'name': 'English'}]Released...3Paramount PicturesUnited ArtistsMetro-Goldwyn-Mayer (MGM)1United States of America
11400000000en393.08.2488958/6/04113.0[{'iso_639_1': 'en', 'name': 'English'}]Released...DramaFamily1Walt Disney Pictures1United States of America
2033000001en130.064.29999010/10/14105.0[{'iso_639_1': 'en', 'name': 'English'}]Released...3Bold FilmsBlumhouse ProductionsRight of Way Films1United States of America
3012000001hi581.03.1749363/9/12122.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...Released...Drama01India
4000ko168.01.1480702/5/09118.0[{'iso_639_1': 'ko', 'name': '한국어/조선말'}]Released...Thriller01South Korea
\n", "

5 rows × 27 columns

\n", "
" ], "text/plain": [ " belongs_to_collection budget homepage original_language overview \\\n", "0 1 14000000 0 en 155.0 \n", "1 1 40000000 0 en 393.0 \n", "2 0 3300000 1 en 130.0 \n", "3 0 1200000 1 hi 581.0 \n", "4 0 0 0 ko 168.0 \n", "\n", " popularity release_date runtime \\\n", "0 6.575393 2/20/15 93.0 \n", "1 8.248895 8/6/04 113.0 \n", "2 64.299990 10/10/14 105.0 \n", "3 3.174936 3/9/12 122.0 \n", "4 1.148070 2/5/09 118.0 \n", "\n", " spoken_languages status ... \\\n", "0 [{'iso_639_1': 'en', 'name': 'English'}] Released ... \n", "1 [{'iso_639_1': 'en', 'name': 'English'}] Released ... \n", "2 [{'iso_639_1': 'en', 'name': 'English'}] Released ... \n", "3 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... Released ... \n", "4 [{'iso_639_1': 'ko', 'name': '한국어/조선말'}] Released ... \n", "\n", " genres2 genres3 production_company_number prod1 \\\n", "0 3 Paramount Pictures \n", "1 Drama Family 1 Walt Disney Pictures \n", "2 3 Bold Films \n", "3 Drama 0 \n", "4 Thriller 0 \n", "\n", " prod2 prod3 production_country_number \\\n", "0 United Artists Metro-Goldwyn-Mayer (MGM) 1 \n", "1 1 \n", "2 Blumhouse Productions Right of Way Films 1 \n", "3 1 \n", "4 1 \n", "\n", " country1 country2 country3 \n", "0 United States of America \n", "1 United States of America \n", "2 United States of America \n", "3 India \n", "4 South Korea \n", "\n", "[5 rows x 27 columns]" ] }, "execution_count": 418, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined.head()" ] }, { "cell_type": "code", "execution_count": 419, "metadata": {}, "outputs": [], "source": [ "# Parse and break-down the date column ('release_date' column)\n", "combined['release_date'] = pd.to_datetime(combined['release_date'], format='%m/%d/%y')\n", "\n", "# Parse 'weekday'\n", "combined['weekday'] = combined['release_date'].dt.weekday\n", "\n", "# fill Nan in 'weekday' column with the most common weekday value - 4\n", "combined['weekday'].fillna(4, inplace=True)\n", "\n", "# Parse 'month'\n", "combined['month'] = combined['release_date'].dt.month\n", "\n", "# fill Nan in 'month' with the most common month value - 9\n", "combined['month'].fillna(9, inplace=True)\n", "\n", "# Parse 'year'\n", "combined['year'] = combined['release_date'].dt.year\n", "\n", "# fill Nan in 'year' with the median value of the 'year' column\n", "combined['year'].fillna(combined['year'].median(), inplace=True)\n", "\n", "# Parse 'day'\n", "combined['day'] = combined['release_date'].dt.day\n", "\n", "# fill Nan with the most common day value - 1\n", "combined['day'].fillna(1, inplace=True)\n", "\n", "# Drop the original 'release_date' column\n", "combined.drop(columns =['release_date'], inplace=True)" ] }, { "cell_type": "code", "execution_count": 420, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
belongs_to_collectionbudgethomepageoriginal_languageoverviewpopularityruntimespoken_languagesstatustagline...prod2prod3production_country_numbercountry1country2country3weekdaymonthyearday
01140000000en155.06.57539393.0[{'iso_639_1': 'en', 'name': 'English'}]Released1...United ArtistsMetro-Goldwyn-Mayer (MGM)1United States of America4.02.02015.020.0
11400000000en393.08.248895113.0[{'iso_639_1': 'en', 'name': 'English'}]Released1...1United States of America4.08.02004.06.0
2033000001en130.064.299990105.0[{'iso_639_1': 'en', 'name': 'English'}]Released1...Blumhouse ProductionsRight of Way Films1United States of America4.010.02014.010.0
3012000001hi581.03.174936122.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...Released0...1India4.03.02012.09.0
4000ko168.01.148070118.0[{'iso_639_1': 'ko', 'name': '한국어/조선말'}]Released0...1South Korea3.02.02009.05.0
\n", "

5 rows × 30 columns

\n", "
" ], "text/plain": [ " belongs_to_collection budget homepage original_language overview \\\n", "0 1 14000000 0 en 155.0 \n", "1 1 40000000 0 en 393.0 \n", "2 0 3300000 1 en 130.0 \n", "3 0 1200000 1 hi 581.0 \n", "4 0 0 0 ko 168.0 \n", "\n", " popularity runtime spoken_languages \\\n", "0 6.575393 93.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "1 8.248895 113.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "2 64.299990 105.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "3 3.174936 122.0 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... \n", "4 1.148070 118.0 [{'iso_639_1': 'ko', 'name': '한국어/조선말'}] \n", "\n", " status tagline ... prod2 prod3 \\\n", "0 Released 1 ... United Artists Metro-Goldwyn-Mayer (MGM) \n", "1 Released 1 ... \n", "2 Released 1 ... Blumhouse Productions Right of Way Films \n", "3 Released 0 ... \n", "4 Released 0 ... \n", "\n", " production_country_number country1 country2 country3 \\\n", "0 1 United States of America \n", "1 1 United States of America \n", "2 1 United States of America \n", "3 1 India \n", "4 1 South Korea \n", "\n", " weekday month year day \n", "0 4.0 2.0 2015.0 20.0 \n", "1 4.0 8.0 2004.0 6.0 \n", "2 4.0 10.0 2014.0 10.0 \n", "3 4.0 3.0 2012.0 9.0 \n", "4 3.0 2.0 2009.0 5.0 \n", "\n", "[5 rows x 30 columns]" ] }, "execution_count": 420, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined.head(5)" ] }, { "cell_type": "code", "execution_count": 421, "metadata": {}, "outputs": [], "source": [ "# fill the Nan values in the 'runtime' column with the median value\n", "combined['runtime'].fillna(combined['runtime'].median(), inplace=True)" ] }, { "cell_type": "code", "execution_count": 422, "metadata": {}, "outputs": [], "source": [ "# Create a new column with the number of spoken languages for each movie\n", "combined['spoken_languages_number'] = \\\n", " combined['spoken_languages'].apply(lambda x: len(x))" ] }, { "cell_type": "code", "execution_count": 423, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 1\n", "2 1\n", "3 2\n", "4 1\n", "Name: spoken_languages_number, dtype: int64" ] }, "execution_count": 423, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['spoken_languages_number'].head()" ] }, { "cell_type": "code", "execution_count": 424, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
belongs_to_collectionbudgethomepageoriginal_languageoverviewpopularityruntimespoken_languagesstatustagline...prod3production_country_numbercountry1country2country3weekdaymonthyeardayspoken_languages_number
01140000000en155.06.57539393.0[{'iso_639_1': 'en', 'name': 'English'}]Released1...Metro-Goldwyn-Mayer (MGM)1United States of America4.02.02015.020.01
11400000000en393.08.248895113.0[{'iso_639_1': 'en', 'name': 'English'}]Released1...1United States of America4.08.02004.06.01
2033000001en130.064.299990105.0[{'iso_639_1': 'en', 'name': 'English'}]Released1...Right of Way Films1United States of America4.010.02014.010.01
3012000001hi581.03.174936122.0[{'iso_639_1': 'en', 'name': 'English'}, {'iso...Released0...1India4.03.02012.09.02
4000ko168.01.148070118.0[{'iso_639_1': 'ko', 'name': '한국어/조선말'}]Released0...1South Korea3.02.02009.05.01
\n", "

5 rows × 31 columns

\n", "
" ], "text/plain": [ " belongs_to_collection budget homepage original_language overview \\\n", "0 1 14000000 0 en 155.0 \n", "1 1 40000000 0 en 393.0 \n", "2 0 3300000 1 en 130.0 \n", "3 0 1200000 1 hi 581.0 \n", "4 0 0 0 ko 168.0 \n", "\n", " popularity runtime spoken_languages \\\n", "0 6.575393 93.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "1 8.248895 113.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "2 64.299990 105.0 [{'iso_639_1': 'en', 'name': 'English'}] \n", "3 3.174936 122.0 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... \n", "4 1.148070 118.0 [{'iso_639_1': 'ko', 'name': '한국어/조선말'}] \n", "\n", " status tagline ... prod3 \\\n", "0 Released 1 ... Metro-Goldwyn-Mayer (MGM) \n", "1 Released 1 ... \n", "2 Released 1 ... Right of Way Films \n", "3 Released 0 ... \n", "4 Released 0 ... \n", "\n", " production_country_number country1 country2 country3 \\\n", "0 1 United States of America \n", "1 1 United States of America \n", "2 1 United States of America \n", "3 1 India \n", "4 1 South Korea \n", "\n", " weekday month year day spoken_languages_number \n", "0 4.0 2.0 2015.0 20.0 1 \n", "1 4.0 8.0 2004.0 6.0 1 \n", "2 4.0 10.0 2014.0 10.0 1 \n", "3 4.0 3.0 2012.0 9.0 2 \n", "4 3.0 2.0 2009.0 5.0 1 \n", "\n", "[5 rows x 31 columns]" ] }, "execution_count": 424, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined.head()" ] }, { "cell_type": "code", "execution_count": 425, "metadata": {}, "outputs": [], "source": [ "# function to parse the spoken languages of a movie.\n", "# few movies do not have a spoken languages value, some have more than one value\n", "# the function will parse only the first 3 spoken languages (if exist)\n", "# and create 3 new columns named: 'lang1', 'lang2', 'lang3'\n", "# in the combined dataset\n", "def parse_spoken_languages(x):\n", " if type(x) == str:\n", " return pd.Series(['','',''], index=['lang1', 'lang2', 'lang3'])\n", " if len(x) == 1:\n", " return pd.Series([x[0]['name'],'',''], index=['lang1', 'lang2', 'lang3'])\n", " if len(x) == 2:\n", " return pd.Series([x[0]['name'],x[1]['name'],''], index=['lang1', 'lang2', 'lang3'])\n", " if len(x) > 2:\n", " return pd.Series([x[0]['name'],x[1]['name'],x[2]['name']], index=['lang1', 'lang2', 'lang3'])" ] }, { "cell_type": "code", "execution_count": 426, "metadata": {}, "outputs": [], "source": [ "# Apply the function to create 3 new columns and drop the original 'spoken languages' column\n", "combined[['lang1', 'lang2', 'lang3']] = combined['spoken_languages'].apply(parse_spoken_languages)\n", "combined.drop(columns='spoken_languages', inplace=True)" ] }, { "cell_type": "code", "execution_count": 427, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
belongs_to_collectionbudgethomepageoriginal_languageoverviewpopularityruntimestatustaglineKeywords...country2country3weekdaymonthyeardayspoken_languages_numberlang1lang2lang3
01140000000en155.06.57539393.0Released1[{'id': 4379, 'name': 'time travel'}, {'id': 9......4.02.02015.020.01English
11400000000en393.08.248895113.0Released1[{'id': 2505, 'name': 'coronation'}, {'id': 42......4.08.02004.06.01English
2033000001en130.064.299990105.0Released1[{'id': 1416, 'name': 'jazz'}, {'id': 1523, 'n......4.010.02014.010.01English
3012000001hi581.03.174936122.0Released0[{'id': 10092, 'name': 'mystery'}, {'id': 1054......4.03.02012.09.02Englishहिन्दी
4000ko168.01.148070118.0Released0...3.02.02009.05.01한국어/조선말
\n", "

5 rows × 33 columns

\n", "
" ], "text/plain": [ " belongs_to_collection budget homepage original_language overview \\\n", "0 1 14000000 0 en 155.0 \n", "1 1 40000000 0 en 393.0 \n", "2 0 3300000 1 en 130.0 \n", "3 0 1200000 1 hi 581.0 \n", "4 0 0 0 ko 168.0 \n", "\n", " popularity runtime status tagline \\\n", "0 6.575393 93.0 Released 1 \n", "1 8.248895 113.0 Released 1 \n", "2 64.299990 105.0 Released 1 \n", "3 3.174936 122.0 Released 0 \n", "4 1.148070 118.0 Released 0 \n", "\n", " Keywords ... country2 country3 \\\n", "0 [{'id': 4379, 'name': 'time travel'}, {'id': 9... ... \n", "1 [{'id': 2505, 'name': 'coronation'}, {'id': 42... ... \n", "2 [{'id': 1416, 'name': 'jazz'}, {'id': 1523, 'n... ... \n", "3 [{'id': 10092, 'name': 'mystery'}, {'id': 1054... ... \n", "4 ... \n", "\n", " weekday month year day spoken_languages_number lang1 lang2 lang3 \n", "0 4.0 2.0 2015.0 20.0 1 English \n", "1 4.0 8.0 2004.0 6.0 1 English \n", "2 4.0 10.0 2014.0 10.0 1 English \n", "3 4.0 3.0 2012.0 9.0 2 English हिन्दी \n", "4 3.0 2.0 2009.0 5.0 1 한국어/조선말 \n", "\n", "[5 rows x 33 columns]" ] }, "execution_count": 427, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined.head()" ] }, { "cell_type": "code", "execution_count": 428, "metadata": {}, "outputs": [], "source": [ "# Most of the 'status' column values are 'Released'\n", "# hence, the Nan values in this column will change to 'Released'\n", "combined['status'].fillna('Released', inplace=True)" ] }, { "cell_type": "code", "execution_count": 429, "metadata": {}, "outputs": [], "source": [ "# Create a new column with the number of Keywords for each movie\n", "combined['keywords_number'] = \\\n", " combined['Keywords'].apply(lambda x: len(x))" ] }, { "cell_type": "code", "execution_count": 430, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 4\n", "1 4\n", "2 12\n", "3 7\n", "4 0\n", "Name: keywords_number, dtype: int64" ] }, "execution_count": 430, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['keywords_number'].head()" ] }, { "cell_type": "code", "execution_count": 431, "metadata": {}, "outputs": [], "source": [ "# function to parse the Keywords of a movie.\n", "# few movies do not have a keywords value, some have more than one value\n", "# the function will parse only the first 3 keywords (if exist)\n", "# and create 3 new columns named: 'key1', 'key2', 'key3'\n", "# in the combined dataset\n", "def parse_keywords(x):\n", " if type(x) == str:\n", " return pd.Series(['','',''], index=['key1', 'key2', 'key3'])\n", " if len(x) == 1:\n", " return pd.Series([x[0]['name'],'',''], index=['key1', 'key2', 'key3'])\n", " if len(x) == 2:\n", " return pd.Series([x[0]['name'],x[1]['name'],''], index=['key1', 'key2', 'key3'])\n", " if len(x) > 2:\n", " return pd.Series([x[0]['name'],x[1]['name'],x[2]['name']], index=['key1', 'key2', 'key3'])" ] }, { "cell_type": "code", "execution_count": 432, "metadata": {}, "outputs": [], "source": [ "# Apply the function to create 3 new columns and drop the original 'Keywords' column\n", "combined[['key1', 'key2', 'key3']] = \\\n", " combined['Keywords'].apply(parse_keywords)\n", "combined.drop(columns='Keywords', inplace=True)" ] }, { "cell_type": "code", "execution_count": 433, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
belongs_to_collectionbudgethomepageoriginal_languageoverviewpopularityruntimestatustaglinecast...yeardayspoken_languages_numberlang1lang2lang3keywords_numberkey1key2key3
01140000000en155.06.57539393.0Released1[{'cast_id': 4, 'character': 'Lou', 'credit_id......2015.020.01English4time travelsequelhot tub
11400000000en393.08.248895113.0Released1[{'cast_id': 1, 'character': 'Mia Thermopolis'......2004.06.01English4coronationdutymarriage
2033000001en130.064.299990105.0Released1[{'cast_id': 5, 'character': 'Andrew Neimann',......2014.010.01English12jazzobsessionconservatory
3012000001hi581.03.174936122.0Released0[{'cast_id': 1, 'character': 'Vidya Bagchi', '......2012.09.02Englishहिन्दी7mysterybollywoodpolice corruption
4000ko168.01.148070118.0Released0[{'cast_id': 3, 'character': 'Chun-soo', 'cred......2009.05.01한국어/조선말0
\n", "

5 rows × 36 columns

\n", "
" ], "text/plain": [ " belongs_to_collection budget homepage original_language overview \\\n", "0 1 14000000 0 en 155.0 \n", "1 1 40000000 0 en 393.0 \n", "2 0 3300000 1 en 130.0 \n", "3 0 1200000 1 hi 581.0 \n", "4 0 0 0 ko 168.0 \n", "\n", " popularity runtime status tagline \\\n", "0 6.575393 93.0 Released 1 \n", "1 8.248895 113.0 Released 1 \n", "2 64.299990 105.0 Released 1 \n", "3 3.174936 122.0 Released 0 \n", "4 1.148070 118.0 Released 0 \n", "\n", " cast ... \\\n", "0 [{'cast_id': 4, 'character': 'Lou', 'credit_id... ... \n", "1 [{'cast_id': 1, 'character': 'Mia Thermopolis'... ... \n", "2 [{'cast_id': 5, 'character': 'Andrew Neimann',... ... \n", "3 [{'cast_id': 1, 'character': 'Vidya Bagchi', '... ... \n", "4 [{'cast_id': 3, 'character': 'Chun-soo', 'cred... ... \n", "\n", " year day spoken_languages_number lang1 lang2 lang3 \\\n", "0 2015.0 20.0 1 English \n", "1 2004.0 6.0 1 English \n", "2 2014.0 10.0 1 English \n", "3 2012.0 9.0 2 English हिन्दी \n", "4 2009.0 5.0 1 한국어/조선말 \n", "\n", " keywords_number key1 key2 key3 \n", "0 4 time travel sequel hot tub \n", "1 4 coronation duty marriage \n", "2 12 jazz obsession conservatory \n", "3 7 mystery bollywood police corruption \n", "4 0 \n", "\n", "[5 rows x 36 columns]" ] }, "execution_count": 433, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined.head()" ] }, { "cell_type": "code", "execution_count": 434, "metadata": {}, "outputs": [], "source": [ "# Counting the number of the cast for genders 0,1,2 for each movie\n", "combined['gender_0_number'] = combined['cast'].apply(lambda row: sum([x['gender'] == 0 for x in row]))\n", "combined['gender_1_number'] = combined['cast'].apply(lambda row: sum([x['gender'] == 1 for x in row]))\n", "combined['gender_2_number'] = combined['cast'].apply(lambda row: sum([x['gender'] == 2 for x in row]))" ] }, { "cell_type": "code", "execution_count": 435, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "0 6\n", "1 0\n", "2 31\n", "3 4\n", "4 0\n", "Name: gender_0_number, dtype: int64" ] }, "execution_count": 435, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Sample to observe one of the new columns head\n", "combined['gender_0_number'].head()" ] }, { "cell_type": "code", "execution_count": 436, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Index(['belongs_to_collection', 'budget', 'homepage', 'original_language',\n", " 'overview', 'popularity', 'runtime', 'status', 'tagline', 'cast',\n", " 'crew', 'revenue', 'genre_number', 'genres1', 'genres2', 'genres3',\n", " 'production_company_number', 'prod1', 'prod2', 'prod3',\n", " 'production_country_number', 'country1', 'country2', 'country3',\n", " 'weekday', 'month', 'year', 'day', 'spoken_languages_number', 'lang1',\n", " 'lang2', 'lang3', 'keywords_number', 'key1', 'key2', 'key3',\n", " 'gender_0_number', 'gender_1_number', 'gender_2_number'],\n", " dtype='object')" ] }, "execution_count": 436, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined.columns" ] }, { "cell_type": "code", "execution_count": 437, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
belongs_to_collectionbudgethomepageoriginal_languageoverviewpopularityruntimestatustaglinecast...lang1lang2lang3keywords_numberkey1key2key3gender_0_numbergender_1_numbergender_2_number
01140000000en155.06.57539393.0Released1[{'cast_id': 4, 'character': 'Lou', 'credit_id......English4time travelsequelhot tub6810
11400000000en393.08.248895113.0Released1[{'cast_id': 1, 'character': 'Mia Thermopolis'......English4coronationdutymarriage01010
2033000001en130.064.299990105.0Released1[{'cast_id': 5, 'character': 'Andrew Neimann',......English12jazzobsessionconservatory31713
3012000001hi581.03.174936122.0Released0[{'cast_id': 1, 'character': 'Vidya Bagchi', '......Englishहिन्दी7mysterybollywoodpolice corruption412
4000ko168.01.148070118.0Released0[{'cast_id': 3, 'character': 'Chun-soo', 'cred......한국어/조선말0004
\n", "

5 rows × 39 columns

\n", "
" ], "text/plain": [ " belongs_to_collection budget homepage original_language overview \\\n", "0 1 14000000 0 en 155.0 \n", "1 1 40000000 0 en 393.0 \n", "2 0 3300000 1 en 130.0 \n", "3 0 1200000 1 hi 581.0 \n", "4 0 0 0 ko 168.0 \n", "\n", " popularity runtime status tagline \\\n", "0 6.575393 93.0 Released 1 \n", "1 8.248895 113.0 Released 1 \n", "2 64.299990 105.0 Released 1 \n", "3 3.174936 122.0 Released 0 \n", "4 1.148070 118.0 Released 0 \n", "\n", " cast ... lang1 \\\n", "0 [{'cast_id': 4, 'character': 'Lou', 'credit_id... ... English \n", "1 [{'cast_id': 1, 'character': 'Mia Thermopolis'... ... English \n", "2 [{'cast_id': 5, 'character': 'Andrew Neimann',... ... English \n", "3 [{'cast_id': 1, 'character': 'Vidya Bagchi', '... ... English \n", "4 [{'cast_id': 3, 'character': 'Chun-soo', 'cred... ... 한국어/조선말 \n", "\n", " lang2 lang3 keywords_number key1 key2 key3 \\\n", "0 4 time travel sequel hot tub \n", "1 4 coronation duty marriage \n", "2 12 jazz obsession conservatory \n", "3 हिन्दी 7 mystery bollywood police corruption \n", "4 0 \n", "\n", " gender_0_number gender_1_number gender_2_number \n", "0 6 8 10 \n", "1 0 10 10 \n", "2 31 7 13 \n", "3 4 1 2 \n", "4 0 0 4 \n", "\n", "[5 rows x 39 columns]" ] }, "execution_count": 437, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined.head()" ] }, { "cell_type": "code", "execution_count": 438, "metadata": {}, "outputs": [], "source": [ "# Create a new column with the number of cast values for each movie\n", "combined['cast_number'] = \\\n", " combined['cast'].apply(lambda x: len(x))" ] }, { "cell_type": "code", "execution_count": 439, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 24\n", "1 20\n", "2 51\n", "3 7\n", "4 4\n", "Name: cast_number, dtype: int64" ] }, "execution_count": 439, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['cast_number'].head()" ] }, { "cell_type": "code", "execution_count": 440, "metadata": {}, "outputs": [], "source": [ "# Parsing the cast column.\n", "# Taking the first five cast members by their cast_id values\n", "# and creating five cast-related new columns \n", "def parse_cast(x):\n", " myindx = ['cast1', 'cast2', 'cast3', 'cast4', 'cast5']\n", " out = [-1]*5\n", " if type(x) != str:\n", " for i in range(min([5,len(x)])):\n", " out[i] = x[i]['id']\n", " return pd.Series(out, index=myindx)" ] }, { "cell_type": "code", "execution_count": 441, "metadata": {}, "outputs": [], "source": [ "# Apply the function to create 5 new columns and drop the original 'cast' column\n", "combined[['cast1', 'cast2', 'cast3', 'cast4', 'cast5']] = combined['cast'].apply(parse_cast)\n", "combined.drop(columns='cast', inplace=True)" ] }, { "cell_type": "code", "execution_count": 442, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
belongs_to_collectionbudgethomepageoriginal_languageoverviewpopularityruntimestatustaglinecrew...key3gender_0_numbergender_1_numbergender_2_numbercast_numbercast1cast2cast3cast4cast5
01140000000en155.06.57539393.0Released1[{'credit_id': '59ac067c92514107af02c8c8', 'de......hot tub6810245299764342547293680154812
11400000000en393.08.248895113.0Released1[{'credit_id': '52fe43fe9251416c7502563d', 'de......marriage010102018135823121065533656
2033000001en130.064.299990105.0Released1[{'credit_id': '54d5356ec3a3683ba0000039', 'de......conservatory317135199670118999129104970216223012
3012000001hi581.03.174936122.0Released0[{'credit_id': '52fe48779251416c9108d6eb', 'de......police corruption412735068850471021524109364486033
4000ko168.01.148070118.0Released0[{'credit_id': '52fe464b9251416c75073b43', 'de......00448475164453847521130534-1
\n", "

5 rows × 44 columns

\n", "
" ], "text/plain": [ " belongs_to_collection budget homepage original_language overview \\\n", "0 1 14000000 0 en 155.0 \n", "1 1 40000000 0 en 393.0 \n", "2 0 3300000 1 en 130.0 \n", "3 0 1200000 1 hi 581.0 \n", "4 0 0 0 ko 168.0 \n", "\n", " popularity runtime status tagline \\\n", "0 6.575393 93.0 Released 1 \n", "1 8.248895 113.0 Released 1 \n", "2 64.299990 105.0 Released 1 \n", "3 3.174936 122.0 Released 0 \n", "4 1.148070 118.0 Released 0 \n", "\n", " crew ... \\\n", "0 [{'credit_id': '59ac067c92514107af02c8c8', 'de... ... \n", "1 [{'credit_id': '52fe43fe9251416c7502563d', 'de... ... \n", "2 [{'credit_id': '54d5356ec3a3683ba0000039', 'de... ... \n", "3 [{'credit_id': '52fe48779251416c9108d6eb', 'de... ... \n", "4 [{'credit_id': '52fe464b9251416c75073b43', 'de... ... \n", "\n", " key3 gender_0_number gender_1_number gender_2_number \\\n", "0 hot tub 6 8 10 \n", "1 marriage 0 10 10 \n", "2 conservatory 31 7 13 \n", "3 police corruption 4 1 2 \n", "4 0 0 4 \n", "\n", " cast_number cast1 cast2 cast3 cast4 cast5 \n", "0 24 52997 64342 54729 36801 54812 \n", "1 20 1813 5823 1210 655 33656 \n", "2 51 996701 18999 129104 970216 223012 \n", "3 7 35068 85047 1021524 1093644 86033 \n", "4 4 84751 64453 84752 1130534 -1 \n", "\n", "[5 rows x 44 columns]" ] }, "execution_count": 442, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined.head()" ] }, { "cell_type": "code", "execution_count": 443, "metadata": {}, "outputs": [], "source": [ "# Create a new column with the number of crew values for each movie\n", "combined['crew_number'] = combined['crew'].apply(lambda x: len(x))" ] }, { "cell_type": "code", "execution_count": 444, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 72\n", "1 9\n", "2 64\n", "3 3\n", "4 2\n", "Name: crew_number, dtype: int64" ] }, "execution_count": 444, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined['crew_number'].head()" ] }, { "cell_type": "code", "execution_count": 445, "metadata": {}, "outputs": [], "source": [ "# function to parse the Director and Producer from the 'crew' column\n", "def parse_crew(x):\n", " myindx = ['Director', 'Producer']\n", " out = [-1]*2\n", " if type(x) != str:\n", " for item in x:\n", " if item['job'] == 'Director':\n", " out[0] = item['id']\n", " elif item['job'] == 'Producer':\n", " out[1] = item['id']\n", " return pd.Series(out, index=myindx)" ] }, { "cell_type": "code", "execution_count": 446, "metadata": {}, "outputs": [], "source": [ "# Apply the function to create 2 new columns and drop the original 'crew' column\n", "combined[['Director', 'Producer']] = combined['crew'].apply(parse_crew)\n", "combined.drop(columns='crew', inplace=True)" ] }, { "cell_type": "code", "execution_count": 447, "metadata": {}, "outputs": [], "source": [ "# Create two new columns (features) for the two columns that contain Numeric Values ('budget', 'popularity')\n", "# using np.log1p (calculate log(1 + x)) since there is a possibility that we will have a zero value\n", "# and log of zero does not exist.\n", "# RandomForest or light_gbm models can use both features without a conflict,\n", "# moreover, these two new features contribute to the models' accuracy.\n", "combined['budget_log'] = np.log1p(combined['budget'])\n", "combined['pop_log'] = np.log1p(combined['popularity'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Apply LabelEncoder on the the new generated columns, fit and transform as a second step" ] }, { "cell_type": "code", "execution_count": 448, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genres1genres2genres3
0Comedy
1ComedyDramaFamily
2Drama
3ThrillerDrama
4ActionThriller
\n", "
" ], "text/plain": [ " genres1 genres2 genres3\n", "0 Comedy \n", "1 Comedy Drama Family\n", "2 Drama \n", "3 Thriller Drama \n", "4 Action Thriller " ] }, "execution_count": 448, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Check the 'key1', 'key2', 'key3' columns Before the label encoding fitting and transforming\n", "combined[['genres1', 'genres2', 'genres3']].head()" ] }, { "cell_type": "code", "execution_count": 449, "metadata": {}, "outputs": [], "source": [ "cols = ['genres1', 'genres2', 'genres3']\n", "allitems = list(set(combined[cols].values.ravel().tolist()))\n", "labeler = LabelEncoder()\n", "labeler.fit(allitems)\n", "combined[cols] = combined[cols].apply(lambda x: labeler.transform(x))" ] }, { "cell_type": "code", "execution_count": 450, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genres1genres2genres3
0400
1478
2700
31870
41180
\n", "
" ], "text/plain": [ " genres1 genres2 genres3\n", "0 4 0 0\n", "1 4 7 8\n", "2 7 0 0\n", "3 18 7 0\n", "4 1 18 0" ] }, "execution_count": 450, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Check the 'key1', 'key2', 'key3' columns After the label encoding fitting and transforming\n", "combined[['genres1', 'genres2', 'genres3']].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### The same process applied on the other 4 generated feature-groups" ] }, { "cell_type": "code", "execution_count": 451, "metadata": {}, "outputs": [], "source": [ "cols = ['prod1', 'prod2', 'prod3']\n", "allitems = list(set(combined[cols].values.ravel().tolist()))\n", "labeler = LabelEncoder()\n", "labeler.fit(allitems)\n", "combined[cols] = combined[cols].apply(lambda x: labeler.transform(x))" ] }, { "cell_type": "code", "execution_count": 452, "metadata": {}, "outputs": [], "source": [ "cols = ['country1', 'country2', 'country3']\n", "allitems = list(set(combined[cols].values.ravel().tolist()))\n", "labeler = LabelEncoder()\n", "labeler.fit(allitems)\n", "combined[cols] = combined[cols].apply(lambda x: labeler.transform(x))" ] }, { "cell_type": "code", "execution_count": 453, "metadata": {}, "outputs": [], "source": [ "cols = ['lang1', 'lang2', 'lang3']\n", "allitems = list(set(combined[cols].values.ravel().tolist()))\n", "labeler = LabelEncoder()\n", "labeler.fit(allitems)\n", "combined[cols] = combined[cols].apply(lambda x: labeler.transform(x))" ] }, { "cell_type": "code", "execution_count": 454, "metadata": {}, "outputs": [], "source": [ "cols = ['key1', 'key2', 'key3']\n", "allitems = list(set(combined[cols].values.ravel().tolist()))\n", "labeler = LabelEncoder()\n", "labeler.fit(allitems)\n", "combined[cols] = combined[cols].apply(lambda x: labeler.transform(x))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Apply LabelEncoder on the category columns" ] }, { "cell_type": "code", "execution_count": 455, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['original_language', 'status'], dtype='object')" ] }, "execution_count": 455, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# find the category columns that left\n", "combined.select_dtypes('object').columns" ] }, { "cell_type": "code", "execution_count": 456, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
belongs_to_collectionbudgethomepageoriginal_languageoverviewpopularityruntimestatustaglinerevenue...cast1cast2cast3cast4cast5crew_numberDirectorProducerbudget_logpop_log
01140000000en155.06.57539393.0Released112314651.0...52997643425472936801548127232275782216.4545682.024905
11400000000en393.08.248895113.0Released195149435.0...18135823121065533656912015997317.5043902.224504
2033000001en130.064.299990105.0Released113092000.0...9967011899912910497021622301264136495128796115.0094334.178992
3012000001hi581.03.174936122.0Released016000000.0...350688504710215241093644860333955109551013.9978331.429099
4000ko168.01.148070118.0Released03923970.0...8475164453847521130534-1284749-10.0000000.764570
\n", "

5 rows × 48 columns

\n", "
" ], "text/plain": [ " belongs_to_collection budget homepage original_language overview \\\n", "0 1 14000000 0 en 155.0 \n", "1 1 40000000 0 en 393.0 \n", "2 0 3300000 1 en 130.0 \n", "3 0 1200000 1 hi 581.0 \n", "4 0 0 0 ko 168.0 \n", "\n", " popularity runtime status tagline revenue ... cast1 \\\n", "0 6.575393 93.0 Released 1 12314651.0 ... 52997 \n", "1 8.248895 113.0 Released 1 95149435.0 ... 1813 \n", "2 64.299990 105.0 Released 1 13092000.0 ... 996701 \n", "3 3.174936 122.0 Released 0 16000000.0 ... 35068 \n", "4 1.148070 118.0 Released 0 3923970.0 ... 84751 \n", "\n", " cast2 cast3 cast4 cast5 crew_number Director Producer \\\n", "0 64342 54729 36801 54812 72 3227 57822 \n", "1 5823 1210 655 33656 9 1201 59973 \n", "2 18999 129104 970216 223012 64 136495 1287961 \n", "3 85047 1021524 1093644 86033 3 95510 95510 \n", "4 64453 84752 1130534 -1 2 84749 -1 \n", "\n", " budget_log pop_log \n", "0 16.454568 2.024905 \n", "1 17.504390 2.224504 \n", "2 15.009433 4.178992 \n", "3 13.997833 1.429099 \n", "4 0.000000 0.764570 \n", "\n", "[5 rows x 48 columns]" ] }, "execution_count": 456, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined.head()" ] }, { "cell_type": "code", "execution_count": 457, "metadata": {}, "outputs": [], "source": [ "# Label Encode the two left category column\n", "combined_dummy = combined.copy()\n", "cat_col = combined.select_dtypes('object').columns\n", "combined_dummy[cat_col] = combined_dummy[cat_col].apply(lambda x: LabelEncoder().fit_transform(x))" ] }, { "cell_type": "code", "execution_count": 458, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
belongs_to_collectionbudgethomepageoriginal_languageoverviewpopularityruntimestatustaglinerevenue...cast1cast2cast3cast4cast5crew_numberDirectorProducerbudget_logpop_log
0114000000010155.06.57539393.01112314651.0...52997643425472936801548127232275782216.4545682.024905
1140000000010393.08.248895113.01195149435.0...18135823121065533656912015997317.5043902.224504
203300000110130.064.299990105.01113092000.0...9967011899912910497021622301264136495128796115.0094334.178992
301200000116581.03.174936122.01016000000.0...350688504710215241093644860333955109551013.9978331.429099
400024168.01.148070118.0103923970.0...8475164453847521130534-1284749-10.0000000.764570
\n", "

5 rows × 48 columns

\n", "
" ], "text/plain": [ " belongs_to_collection budget homepage original_language overview \\\n", "0 1 14000000 0 10 155.0 \n", "1 1 40000000 0 10 393.0 \n", "2 0 3300000 1 10 130.0 \n", "3 0 1200000 1 16 581.0 \n", "4 0 0 0 24 168.0 \n", "\n", " popularity runtime status tagline revenue ... cast1 cast2 \\\n", "0 6.575393 93.0 1 1 12314651.0 ... 52997 64342 \n", "1 8.248895 113.0 1 1 95149435.0 ... 1813 5823 \n", "2 64.299990 105.0 1 1 13092000.0 ... 996701 18999 \n", "3 3.174936 122.0 1 0 16000000.0 ... 35068 85047 \n", "4 1.148070 118.0 1 0 3923970.0 ... 84751 64453 \n", "\n", " cast3 cast4 cast5 crew_number Director Producer budget_log \\\n", "0 54729 36801 54812 72 3227 57822 16.454568 \n", "1 1210 655 33656 9 1201 59973 17.504390 \n", "2 129104 970216 223012 64 136495 1287961 15.009433 \n", "3 1021524 1093644 86033 3 95510 95510 13.997833 \n", "4 84752 1130534 -1 2 84749 -1 0.000000 \n", "\n", " pop_log \n", "0 2.024905 \n", "1 2.224504 \n", "2 4.178992 \n", "3 1.429099 \n", "4 0.764570 \n", "\n", "[5 rows x 48 columns]" ] }, "execution_count": 458, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined_dummy.head()" ] }, { "cell_type": "code", "execution_count": 459, "metadata": {}, "outputs": [], "source": [ "# Split the combined dataset back to Test and Train sets\n", "train_data = combined_dummy.iloc[:ntrain]\n", "test_data = combined_dummy.iloc[-ntest:]" ] }, { "cell_type": "code", "execution_count": 460, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
belongs_to_collectionbudgethomepageoriginal_languageoverviewpopularityruntimestatustaglinerevenue...cast1cast2cast3cast4cast5crew_numberDirectorProducerbudget_logpop_log
0114000000010155.06.57539393.01112314651.0...52997643425472936801548127232275782216.4545682.024905
1140000000010393.08.248895113.01195149435.0...18135823121065533656912015997317.5043902.224504
203300000110130.064.299990105.01113092000.0...9967011899912910497021622301264136495128796115.0094334.178992
301200000116581.03.174936122.01016000000.0...350688504710215241093644860333955109551013.9978331.429099
400024168.01.148070118.0103923970.0...8475164453847521130534-1284749-10.0000000.764570
\n", "

5 rows × 48 columns

\n", "
" ], "text/plain": [ " belongs_to_collection budget homepage original_language overview \\\n", "0 1 14000000 0 10 155.0 \n", "1 1 40000000 0 10 393.0 \n", "2 0 3300000 1 10 130.0 \n", "3 0 1200000 1 16 581.0 \n", "4 0 0 0 24 168.0 \n", "\n", " popularity runtime status tagline revenue ... cast1 cast2 \\\n", "0 6.575393 93.0 1 1 12314651.0 ... 52997 64342 \n", "1 8.248895 113.0 1 1 95149435.0 ... 1813 5823 \n", "2 64.299990 105.0 1 1 13092000.0 ... 996701 18999 \n", "3 3.174936 122.0 1 0 16000000.0 ... 35068 85047 \n", "4 1.148070 118.0 1 0 3923970.0 ... 84751 64453 \n", "\n", " cast3 cast4 cast5 crew_number Director Producer budget_log \\\n", "0 54729 36801 54812 72 3227 57822 16.454568 \n", "1 1210 655 33656 9 1201 59973 17.504390 \n", "2 129104 970216 223012 64 136495 1287961 15.009433 \n", "3 1021524 1093644 86033 3 95510 95510 13.997833 \n", "4 84752 1130534 -1 2 84749 -1 0.000000 \n", "\n", " pop_log \n", "0 2.024905 \n", "1 2.224504 \n", "2 4.178992 \n", "3 1.429099 \n", "4 0.764570 \n", "\n", "[5 rows x 48 columns]" ] }, "execution_count": 460, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_data.head()" ] }, { "cell_type": "code", "execution_count": 461, "metadata": {}, "outputs": [], "source": [ "#export to csv file to view the final clean dataset\n", "#train_data.to_csv('out_for_check.csv')" ] }, { "cell_type": "code", "execution_count": 462, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3000, 48)" ] }, "execution_count": 462, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_data.shape" ] }, { "cell_type": "code", "execution_count": 463, "metadata": {}, "outputs": [], "source": [ "# Drop the 'revenue' column, it is the values to predict \n", "X_train = train_data.drop(columns='revenue').values\n", "\n", "# The log transformation of the revenue gives better results, hence, we will use it\n", "y_train = np.log1p(train_data['revenue'].values)\n", "\n", "# Drop the 'revenue' column, will be filled at the end when the model will be ready\n", "X_test = test_data.drop(columns='revenue').values" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Start with a basic Linear Regression Model" ] }, { "cell_type": "code", "execution_count": 464, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LinearRegression\n", "from sklearn.model_selection import cross_val_score, KFold, cross_val_predict\n", "from sklearn.preprocessing import LabelEncoder\n", "from sklearn.metrics import mean_squared_error\n", "from sklearn.ensemble import RandomForestRegressor" ] }, { "cell_type": "code", "execution_count": 465, "metadata": {}, "outputs": [], "source": [ "kf = KFold(n_splits=5, shuffle=True, random_state=123)" ] }, { "cell_type": "code", "execution_count": 466, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "RMSLE: 2.24\n" ] } ], "source": [ "lr = LinearRegression()\n", "y_pred = cross_val_predict(lr, X_train, y_train, cv=kf)\n", "y_pred[y_pred < 0 ] = 0\n", "\n", "# print a result for kaggle website competition format\n", "print('RMSLE: {0:.2f}'.format(np.sqrt(mean_squared_error(y_train, y_pred))))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Continue with a random forest regression model (Improved result comparing to the LinearRegression try)" ] }, { "cell_type": "code", "execution_count": 467, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "RMSLE: 2.19\n" ] } ], "source": [ "rf = RandomForestRegressor(max_depth=8, random_state=123, n_estimators=100)\n", "y_pred = cross_val_predict(rf, X_train, y_train, cv=kf)\n", "y_pred[y_pred < 0 ] = 0\n", "\n", "# print a result for kaggle website competition\n", "print('RMSLE: {0:.2f}'.format(np.sqrt(mean_squared_error(y_train, y_pred))))" ] }, { "cell_type": "code", "execution_count": 468, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 468, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# View the importance of the features of the random forest model in a bar plot.\n", "# dropping the revenue column before \n", "rf.fit(X_train, y_train)\n", "imp = pd.Series(rf.feature_importances_, index=train_data.drop(columns='revenue').columns)\n", "imp.sort_values(ascending=False).plot(kind='barh', figsize=(8,10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Continue with an LGBMRegressor Model (fast execution) the results improved comparing to the RandomForestRegressor try" ] }, { "cell_type": "code", "execution_count": 469, "metadata": {}, "outputs": [], "source": [ "import lightgbm as lgb" ] }, { "cell_type": "code", "execution_count": 470, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "RMSLE: 2.10\n" ] } ], "source": [ "# the parameters of this model explanation:\n", "# 0.4 means that for each of the 1500 (n_estimator) only 40% of the features \n", "# will be selected (randomly).\n", "# max_depth is inf (-1) but is restricted by the leaves number (20)\n", "lgb_model = lgb.LGBMRegressor(num_leaves=20, max_depth=-1, learning_rate=0.01, \n", " metrics='rmse', n_estimators=1500, feature_fraction = 0.4)\n", "\n", "y_pred = cross_val_predict(lgb_model, X_train, y_train, cv=kf)\n", "\n", "# print a result for kaggle website competition\n", "print('RMSLE: {0:.2f}'.format(np.sqrt(mean_squared_error(y_train, y_pred))))" ] }, { "cell_type": "code", "execution_count": 471, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 471, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# View the importance of the features of the LGBMRegressor model in a bar plot.\n", "# dropping the revenue column before \n", "# According to this model, the year is the most important feature in predicting the revenue\n", "# and that makes sense, as the years pass the revenue increase. (accross all Industries)\n", "# The second important feature according to this model is the production company, budget, director..\n", "# The choices of this model are relevant and lead to a better prediction outcome, compare to \n", "# the previous two models that I tried. \n", "\n", "lgb_model.fit(X_train, y_train)\n", "imp = pd.Series(lgb_model.feature_importances_, index=train_data.drop(columns='revenue').columns)\n", "imp.sort_values(ascending=False).plot(kind='barh', figsize=(8,10))" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [], "source": [ "# Preparing the format needed for submission to kaggle website\n", "pred = lgb_model.predict(X_test)\n", "sub = pd.DataFrame()\n", "sub['id'] = test['id']\n", "sub['revenue'] = np.expm1(pred)\n", "sub.to_csv('mysubmission2.csv', index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 2 }