Spaces:

seyia92coding
/

Simple-Text-based-Gaming-Recommender

Runtime error

App Files Files Community

Simple-Text-based-Gaming-Recommender / app.py

seyia92coding

Update app.py

5bcbd08 almost 4 years ago

raw

history blame

6.5 kB

	# -- coding: utf-8 --
	"""HS_Text-based_Recom_Metacritic.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1MmWRwRJT04GVAO2SKCpwSqQ2bWghVGtQ
	"""

	import pandas as pd
	import numpy as np
	from fuzzywuzzy import fuzz
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity

	df = pd.read_csv("Metacritic_Reviews_Only.csv", error_bad_lines=False, encoding='utf-8')

	#Remove title from review
	def remove_title(row):
	game_title = row['Game Title']
	body_text = row['Reviews']
	new_doc = body_text.replace(game_title, "")
	return new_doc

	df['Reviews'] = df.apply(remove_title, axis=1)
	#drop redundant column
	df = df.drop(['Unnamed: 0'], axis=1)

	df.dropna(inplace=True) #Drop Null Reviews

	# Instantiate the vectorizer object to the vectorizer variable
	#Minimum word count 2 to be included, words that appear in over 70% of docs should not be included
	vectorizer = TfidfVectorizer(min_df=2, max_df=0.7)

	# Fit and transform the plot column
	vectorized_data = vectorizer.fit_transform(df['Reviews'])

	# Create Dataframe from TF-IDFarray
	tfidf_df = pd.DataFrame(vectorized_data.toarray(), columns=vectorizer.get_feature_names())

	# Assign the game titles to the index
	tfidf_df.index = df['Game Title']

	# Find the cosine similarity measures between all game and assign the results to cosine_similarity_array.
	cosine_similarity_array = cosine_similarity(tfidf_df)

	# Create a DataFrame from the cosine_similarity_array with tfidf_df.index as its rows and columns.
	cosine_similarity_df = pd.DataFrame(cosine_similarity_array, index=tfidf_df.index, columns=tfidf_df.index)

	# create a function to find the closest title
	def matching_score(a,b):
	#fuzz.ratio(a,b) calculates the Levenshtein Distance between a and b, and returns the score for the distance
	return fuzz.ratio(a,b)
	# exactly the same, the score becomes 100

	#Convert index to title_year
	def get_title_from_index(index):
	return df[df.index == index]['Game Title'].values[0]

	# A function to return the most similar title to the words a user type
	# Without this, the recommender only works when a user enters the exact title which the data has.
	def find_closest_title(title):
	#matching_score(a,b) > a is the current row, b is the title we're trying to match
	leven_scores = list(enumerate(df['Game Title'].apply(matching_score, b=title))) #[(0, 30), (1,95), (2, 19)~~] A tuple of distances per index
	sorted_leven_scores = sorted(leven_scores, key=lambda x: x[1], reverse=True) #Sorts list of tuples by distance [(1, 95), (3, 49), (0, 30)~~]
	closest_title = get_title_from_index(sorted_leven_scores[0][0])
	distance_score = sorted_leven_scores[0][1]
	return closest_title, distance_score
	# Bejeweled Twist, 100

	#find_closest_title('Batman Arkham Knight')

	"""# Build Recommender Function

	Our recommender function will take in two inputs. The game title and the keyword exclusion. The keyword exclusion was added when I realised that the recommendations were returning a lot of DLCs and sequels which isn't a very useful recommender.


	By combining everything we've done from building the user profile onwards we will pull out the Top 5 games we want to recommend.


	1. Text Match the closest title in the dataset
	2. Assign number for the final ranking
	3. Create your user profile based on previous games
	4. Create TFIDF subset without previously mentioned titles
	5. Calculate cosine similarity based on selected titles and convert back into DataFrame
	6. Sort DataFrame by similarity
	7. Return most similarity game titles that don't contain keyword
	"""

	def recommend_games(game1, game2, game3, keyword1, keyword2, keyword3, max_results):
	#Insert closest title here
	title1, distance_score1 = find_closest_title(game1)
	title2, distance_score2 = find_closest_title(game2)
	title3, distance_score3 = find_closest_title(game3)
	#Counter for Ranking
	number = 1
	print('Recommended because you played {}, {} and {}:\n'.format(title1, title2, title3))

	list_of_games_enjoyed = [title1, title2, title3]
	games_enjoyed_df = tfidf_df.reindex(list_of_games_enjoyed)
	user_prof = games_enjoyed_df.mean()

	tfidf_subset_df = tfidf_df.drop([title1, title2, title3], axis=0)
	similarity_array = cosine_similarity(user_prof.values.reshape(1, -1), tfidf_subset_df)
	similarity_df = pd.DataFrame(similarity_array.T, index=tfidf_subset_df.index, columns=["similarity_score"])

	# Sort the values from high to low by the values in the similarity_score
	sorted_similarity_df = similarity_df.sort_values(by="similarity_score", ascending=False)

	# Inspect the most similar to the user preferences
	print("Without Keywords Exclusions:")
	print(sorted_similarity_df.head())
	print("\n")
	print("With Keywords Exclusions:\n ")

	number = 0
	rank = 1

	for n in sorted_similarity_df.index:
	if rank <= max_results:
	if keyword1.lower() not in n.lower() and keyword2.lower() not in n.lower() and keyword3.lower() not in n.lower():
	print("#" + str(rank) + ": " + n + ", " + str(round(sorted_similarity_df.iloc[number]['similarity_score']*100,2)) + "% " + "match")
	number+=1
	rank +=1
	else:
	continue


	# recommend_games('Mortal Kombat', 'Street Fighter', 'Overwatch', 'Kombat', 'Fighter', 'Overwatch', 5)

	import gradio as gr

	recommender_interface = gr.Interface(fn=recommend_games,
	inputs=["text","text","text","text","text","text", gr.inputs.Slider(1, 20, step=1)],
	title="Text-based Recommendation Engine for Video Games",
	description="""This is a Recommendation Engine based on the review texts of Metacritic critics for games between 2011-2019.
	You need to enter 3 games you've enjoyed playing followed by 3 keywords from those game titles so that I can avoid recommending the same games to you.""",
	examples= [['Mortal Kombat', 'Street Fighter', 'Overwatch', 'Kombat', 'Fighter', 'Overwatch', 5],
	["Batman Arkham Knight","Dying Light","Left 4 Dead","Batman","Dying","Left", 10],
	["Mario Kart","Zelda","Final Fantasy","Mario","Zelda","Final", 7]],
	outputs=["dataframe"])

	recommender_interface.launch(debug=True)