Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| import requests | |
| import gradio as gr | |
| import os | |
| ratings = pd.read_csv("ratings.csv") | |
| movies = pd.read_csv("movies.csv") | |
| OMDB_API_KEY = os.environ.get("omdbapikey") | |
| movie_lookup = movies.set_index("movieId")["title"].to_dict() | |
| reverse_movie_lookup = {v.lower(): k for k, v in movie_lookup.items()} | |
| train_df, test_df = train_test_split(ratings, test_size=0.2, random_state=42) | |
| train_matrix = train_df.pivot_table(index='userId', columns='movieId', values='rating') | |
| train_matrix_filled = train_matrix.fillna(0) | |
| user_similarity = cosine_similarity(train_matrix_filled) | |
| user_similarity_df = pd.DataFrame(user_similarity, index=train_matrix_filled.index, columns=train_matrix_filled.index) | |
| item_rating_matrix = train_matrix_filled.T | |
| item_similarity = cosine_similarity(item_rating_matrix) | |
| item_similarity_df = pd.DataFrame(item_similarity, index=item_rating_matrix.index, columns=item_rating_matrix.index) | |
| data = pd.merge(ratings, movies, on='movieId') | |
| data['genres'] = data['genres'].fillna('') | |
| vectorizer = TfidfVectorizer(token_pattern=r'[a-zA-Z0-9\-]+') | |
| tfidf_matrix = vectorizer.fit_transform(data['genres'].values) | |
| movie_ids = data['movieId'].values | |
| unique_movie_ids, indices = np.unique(movie_ids, return_index=True) | |
| movie_id_to_index = {mid: idx for idx, mid in enumerate(unique_movie_ids)} | |
| movie_genre_matrix = tfidf_matrix[indices] | |
| def get_movie_poster(title): | |
| if not OMDB_API_KEY: | |
| return '' | |
| try: | |
| response = requests.get(f"http://www.omdbapi.com/?t={title}&apikey={OMDB_API_KEY}") | |
| data = response.json() | |
| return data.get('Poster', '') | |
| except: | |
| return '' | |
| def user_cf_recommend(user_id): | |
| try: | |
| user_id = int(user_id) | |
| if user_id not in user_similarity_df.index: | |
| return "User ID not found." | |
| similar_users = user_similarity_df[user_id].drop(user_id) | |
| top_similar_users = similar_users.sort_values(ascending=False).head(10) | |
| scores = {} | |
| sim_sums = {} | |
| for other_user, similarity in top_similar_users.items(): | |
| other_ratings = train_matrix.loc[other_user].dropna() | |
| for movie_id, rating in other_ratings.items(): | |
| if movie_id not in train_matrix.loc[user_id] or pd.isna(train_matrix.loc[user_id, movie_id]): | |
| scores[movie_id] = scores.get(movie_id, 0) + similarity * rating | |
| sim_sums[movie_id] = sim_sums.get(movie_id, 0) + abs(similarity) | |
| ranked_movies = sorted([(movie_id, score / sim_sums[movie_id]) for movie_id, score in scores.items() if sim_sums[movie_id] > 0], | |
| key=lambda x: x[1], reverse=True)[:5] | |
| result = [] | |
| for movie_id, score in ranked_movies: | |
| title = movie_lookup.get(movie_id, 'Unknown') | |
| poster = get_movie_poster(title) | |
| result.append((title, round(score, 2), poster)) | |
| return result | |
| except: | |
| return "Invalid input." | |
| def item_cf_recommend(movie_title): | |
| movie_title = movie_title.lower().strip() | |
| if movie_title not in reverse_movie_lookup: | |
| return "Movie not found." | |
| target_movie_id = reverse_movie_lookup[movie_title] | |
| if target_movie_id not in item_similarity_df: | |
| return "No similarity data available." | |
| similar_scores = item_similarity_df[target_movie_id].drop(target_movie_id) | |
| top_similar_ids = similar_scores.sort_values(ascending=False).head(5).index | |
| result = [] | |
| for mid in top_similar_ids: | |
| title = movie_lookup.get(mid, 'Unknown') | |
| poster = get_movie_poster(title) | |
| result.append((title, poster)) | |
| return result | |
| def cb_recommend(movie_title): | |
| movie_title = movie_title.strip().lower() | |
| movies['title_lower'] = movies['title'].str.lower() | |
| if movie_title not in movies['title_lower'].values: | |
| return "Movie not found." | |
| input_index = movies[movies['title_lower'] == movie_title].index[0] | |
| movie_id = movies.loc[input_index, 'movieId'] | |
| if movie_id not in movie_id_to_index: | |
| return "No genre data available." | |
| input_vec = movie_genre_matrix[movie_id_to_index[movie_id]] | |
| sims = cosine_similarity(input_vec, movie_genre_matrix).flatten() | |
| sim_indices = sims.argsort()[::-1] | |
| seen = set() | |
| result = [] | |
| for i in sim_indices: | |
| rec_movie_id = unique_movie_ids[i] | |
| title = movies[movies['movieId'] == rec_movie_id]['title'].values[0] | |
| if title.lower() != movie_title and title not in seen: | |
| poster = get_movie_poster(title) | |
| result.append((title, poster)) | |
| seen.add(title) | |
| if len(result) == 5: | |
| break | |
| return result | |
| def format_recommendations(recommendations): | |
| if isinstance(recommendations, str): | |
| return recommendations | |
| formatted = [] | |
| for item in recommendations: | |
| if len(item) == 3: | |
| title, score, poster = item | |
| if poster: | |
| formatted.append(f"<div style='display:flex;margin-bottom:10px;'><img src='{poster}' style='width:80px;height:120px;object-fit:cover;margin-right:10px;'><div><b>{title}</b><br>Predicted rating: {score}</div></div>") | |
| else: | |
| formatted.append(f"<div><b>{title}</b><br>Predicted rating: {score}</div>") | |
| else: | |
| title, poster = item | |
| if poster: | |
| formatted.append(f"<div style='display:flex;margin-bottom:10px;'><img src='{poster}' style='width:80px;height:120px;object-fit:cover;margin-right:10px;'><div><b>{title}</b></div></div>") | |
| else: | |
| formatted.append(f"<div><b>{title}</b></div>") | |
| return "<br>".join(formatted) | |
| def respond(message, history): | |
| message = message.lower().strip() | |
| if message.startswith("recommend for user"): | |
| try: | |
| user_id = int(message.split()[-1]) | |
| recs = user_cf_recommend(user_id) | |
| return format_recommendations(recs) | |
| except: | |
| return "Please provide a valid user ID after 'recommend for user'" | |
| elif message.startswith("similar to"): | |
| movie_title = message[10:].strip() | |
| recs = item_cf_recommend(movie_title) | |
| return format_recommendations(recs) | |
| elif message.startswith("recommend like"): | |
| movie_title = message[14:].strip() | |
| recs = cb_recommend(movie_title) | |
| return format_recommendations(recs) | |
| else: | |
| return "Available commands:\n1. 'recommend for user [ID]'\n2. 'similar to [Movie Title]'\n3. 'recommend like [Movie Title]'" | |
| demo = gr.ChatInterface( | |
| respond, | |
| title="Movie Recommendation Chatbot", | |
| description="Ask for recommendations using these commands:\n1. 'recommend for user [ID]'\n2. 'similar to [Movie Title]'\n3. 'recommend like [Movie Title]'", | |
| examples=[ | |
| ["recommend for user 42"], | |
| ["similar to Toy Story"], | |
| ["recommend like The Dark Knight"] | |
| ] | |
| ) | |
| demo.launch() |