Moved from imdb to omdb

This commit is contained in:
Kwoth 2016-10-15 23:11:45 +02:00
parent 1dd8f9b5ea
commit 17f76da220
4 changed files with 9 additions and 188 deletions

View File

@ -1,168 +0,0 @@
using NadekoBot.Extensions;
using NadekoBot.Modules.Searches.Models;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
/*******************************************************************************
* Free ASP.net IMDb Scraper API for the new IMDb Template.
* Author: Abhinay Rathore
* Website: http://www.AbhinayRathore.com
* Blog: http://web3o.blogspot.com
* More Info: http://web3o.blogspot.com/2010/11/aspnetc-imdb-scraping-api.html
* Updated By: Gergo Torcsvari
* Last Updated: Feb, 2016
*******************************************************************************/
namespace NadekoBot.Modules.Searches.IMDB
{
public static class ImdbScraper
{
//Search Engine URLs
private static string GoogleSearch = "https://www.google.com/search?q=imdb+";
//Constructor
public static async Task<ImdbMovie> ImdbScrape(string MovieName, bool GetExtraInfo = true)
{
ImdbMovie mov = new ImdbMovie();
string imdbUrl = await GetIMDbUrlAsync(System.Uri.EscapeUriString(MovieName));
mov.Status = false;
if (!string.IsNullOrWhiteSpace(imdbUrl))
{
await ParseIMDbPage(imdbUrl, GetExtraInfo, mov);
}
return mov;
}
public static async Task<ImdbMovie> ImdbScrapeFromId(string imdbId, bool GetExtraInfo = true)
{
ImdbMovie mov = new ImdbMovie();
string imdbUrl = "http://www.imdb.com/title/" + imdbId + "/";
mov.Status = false;
await ParseIMDbPage(imdbUrl, GetExtraInfo, mov);
return mov;
}
public static async Task<string> GetIMDBId(string MovieName)
{
string imdbUrl = await GetIMDbUrlAsync(System.Uri.EscapeUriString(MovieName));
return Match(@"http://www.imdb.com/title/(tt\d{7})", imdbUrl);
}
//Get IMDb URL from search results
private static async Task<string> GetIMDbUrlAsync(string MovieName)
{
string url = GoogleSearch + MovieName;
string html = await GetUrlDataAsync(url);
List<string> imdbUrls = MatchAll(@"<a href=""(http://www.imdb.com/title/tt\d{7}/)"".*?>.*?</a>", html);
if (imdbUrls.Count > 0)
return (string)imdbUrls[0];
else return String.Empty;
}
//Parse IMDb page data
private static async Task ParseIMDbPage(string imdbUrl, bool GetExtraInfo, ImdbMovie mov)
{
string html = await GetUrlDataAsync(imdbUrl + "combined");
mov.Id = Match(@"<link rel=""canonical"" href=""http://www.imdb.com/title/(tt\d{7})/combined"" />", html);
if (!string.IsNullOrEmpty(mov.Id))
{
mov.Status = true;
mov.Title = Match(@"<title>(IMDb \- )*(.*?) \(.*?</title>", html, 2);
mov.OriginalTitle = Match(@"title-extra"">(.*?)<", html);
mov.Year = Match(@"<title>.*?\(.*?(\d{4}).*?\).*?</title>", Match(@"(<title>.*?</title>)", html));
mov.Rating = Match(@"<b>(\d.\d)/10</b>", html);
mov.Genres = MatchAll(@"<a.*?>(.*?)</a>", Match(@"Genre.?:((.|\n)*?)(<\/div>|See more)", html)).Cast<string>().ToList();
mov.Plot = Match(@"Plot:</h5>\n<div class=""info-content"">\n((.|\n)*?)(<a|</div)", html);
mov.Poster = Match(@"<a name=""poster"".*src=""(.*)""", html);
mov.ImdbURL = "http://www.imdb.com/title/" + mov.Id + "/";
if (GetExtraInfo)
{
string plotHtml = await GetUrlDataAsync(imdbUrl + "plotsummary");
await GetReleaseDatesAndAka(mov);
}
}
}
//Get all release dates and aka-s
private static async Task GetReleaseDatesAndAka(ImdbMovie mov)
{
Dictionary<string, string> release = new Dictionary<string, string>();
string releasehtml = await GetUrlDataAsync("http://www.imdb.com/title/" + mov.Id + "/releaseinfo");
foreach (string r in MatchAll(@"<tr class="".*?"">(.*?)</tr>", Match(@"<table id=""release_dates"" class=""subpage_data spFirst"">\n*?(.*?)</table>", releasehtml)))
{
Match rd = new Regex(@"<td>(.*?)</td>\n*?.*?<td class=.*?>(.*?)</td>", RegexOptions.Multiline).Match(r);
release[StripHTML(rd.Groups[1].Value.Trim())] = StripHTML(rd.Groups[2].Value.Trim());
}
//mov.ReleaseDates = release;
Dictionary<string, string> aka = new Dictionary<string, string>();
List<string> list = MatchAll(@".*?<tr class="".*?"">(.*?)</tr>", Match(@"<table id=""akas"" class=.*?>\n*?(.*?)</table>", releasehtml));
foreach (string r in list)
{
Match rd = new Regex(@"\n*?.*?<td>(.*?)</td>\n*?.*?<td>(.*?)</td>", RegexOptions.Multiline).Match(r);
aka[StripHTML(rd.Groups[1].Value.Trim())] = StripHTML(rd.Groups[2].Value.Trim());
}
mov.Aka = aka;
}
//Get all media images
private static async Task<List<string>> GetMediaImages(ImdbMovie mov)
{
List<string> list = new List<string>();
string mediaurl = "http://www.imdb.com/title/" + mov.Id + "/mediaindex";
string mediahtml = await GetUrlDataAsync(mediaurl);
int pagecount = MatchAll(@"<a href=""\?page=(.*?)"">", Match(@"<span style=""padding: 0 1em;"">(.*?)</span>", mediahtml)).Count;
for (int p = 1; p <= pagecount + 1; p++)
{
mediahtml = await GetUrlDataAsync(mediaurl + "?page=" + p);
foreach (Match m in new Regex(@"src=""(.*?)""", RegexOptions.Multiline).Matches(Match(@"<div class=""thumb_list"" style=""font-size: 0px;"">(.*?)</div>", mediahtml)))
{
String image = m.Groups[1].Value;
list.Add(Regex.Replace(image, @"_V1\..*?.jpg", "_V1._SY0.jpg"));
}
}
return list;
}
//Get Recommended Titles
private static async Task<List<string>> GetRecommendedTitlesAsync(ImdbMovie mov)
{
List<string> list = new List<string>();
string recUrl = "http://www.imdb.com/widget/recommendations/_ajax/get_more_recs?specs=p13nsims%3A" + mov.Id;
string json = await GetUrlDataAsync(recUrl);
return MatchAll(@"title=\\""(.*?)\\""", json);
}
/*******************************[ Helper Methods ]********************************/
//Match single instance
private static string Match(string regex, string html, int i = 1)
{
var m = new Regex(regex, RegexOptions.Multiline).Match(html);
return m.Groups[i].Value.Trim();
}
//Match all instances and return as List<string>
private static List<string> MatchAll(string regex, string html, int i = 1)
{
List<string> list = new List<string>();
foreach (Match m in new Regex(regex, RegexOptions.Multiline).Matches(html))
list.Add(m.Groups[i].Value.Trim());
return list;
}
//Strip HTML Tags
private static string StripHTML(string inputString)
{
return Regex.Replace(inputString, @"<.*?>", string.Empty);
}
//Get URL Data
private static async Task<string> GetUrlDataAsync(string url)
{
using (var http = new HttpClient())
{
http.AddFakeHeaders();
return await http.GetStringAsync(url);
}
}
}
}

View File

@ -12,11 +12,11 @@ using System.Text.RegularExpressions;
using System.Net; using System.Net;
using Discord.WebSocket; using Discord.WebSocket;
using NadekoBot.Modules.Searches.Models; using NadekoBot.Modules.Searches.Models;
using NadekoBot.Modules.Searches.IMDB;
using System.Collections.Generic; using System.Collections.Generic;
using ImageProcessorCore; using ImageProcessorCore;
using NadekoBot.Extensions; using NadekoBot.Extensions;
using System.IO; using System.IO;
using NadekoBot.Modules.Searches.Commands.OMDB;
namespace NadekoBot.Modules.Searches namespace NadekoBot.Modules.Searches
{ {
@ -66,10 +66,6 @@ $@"🌍 **Weather for** 【{obj["target"]}】
await channel.SendMessageAsync(result).ConfigureAwait(false); await channel.SendMessageAsync(result).ConfigureAwait(false);
} }
//todo move to omdb
// |
// v
//{"Title":"Shutter Island","Year":"2010","Rated":"R","Released":"19 Feb 2010","Runtime":"138 min","Genre":"Mystery, Thriller","Director":"Martin Scorsese","Writer":"Laeta Kalogridis (screenplay), Dennis Lehane (novel)","Actors":"Leonardo DiCaprio, Mark Ruffalo, Ben Kingsley, Max von Sydow","Plot":"In 1954, a U.S. marshal investigates the disappearance of a murderess who escaped from a hospital for the criminally insane.","Language":"English, German","Country":"USA","Awards":"8 wins & 59 nominations.","Poster":"https://images-na.ssl-images-amazon.com/images/M/MV5BMTMxMTIyNzMxMV5BMl5BanBnXkFtZTcwOTc4OTI3Mg@@._V1_SX300.jpg","Metascore":"63","imdbRating":"8.1","imdbVotes":"798,447","imdbID":"tt1130884","Type":"movie","Response":"True"}
[NadekoCommand, Usage, Description, Aliases] [NadekoCommand, Usage, Description, Aliases]
[RequireContext(ContextType.Guild)] [RequireContext(ContextType.Guild)]
public async Task Imdb(IUserMessage umsg, [Remainder] string query = null) public async Task Imdb(IUserMessage umsg, [Remainder] string query = null)
@ -78,21 +74,14 @@ $@"🌍 **Weather for** 【{obj["target"]}】
if (!(await ValidateQuery(channel, query).ConfigureAwait(false))) return; if (!(await ValidateQuery(channel, query).ConfigureAwait(false))) return;
await umsg.Channel.TriggerTypingAsync().ConfigureAwait(false); await umsg.Channel.TriggerTypingAsync().ConfigureAwait(false);
string result;
try var movie = await OmdbProvider.FindMovie(query);
{ if (movie == null)
var movie = await ImdbScraper.ImdbScrape(query, true);
if (movie.Status) result = movie.ToString();
else result = "Failed to find that movie.";
}
catch (Exception ex)
{ {
await channel.SendMessageAsync("Failed to find that movie.").ConfigureAwait(false); await channel.SendMessageAsync("Failed to find that movie.").ConfigureAwait(false);
_log.Warn(ex);
return; return;
} }
await channel.SendMessageAsync(movie.ToString()).ConfigureAwait(false);
await channel.SendMessageAsync(result.ToString()).ConfigureAwait(false);
} }
[NadekoCommand, Usage, Description, Aliases] [NadekoCommand, Usage, Description, Aliases]

View File

@ -3111,7 +3111,7 @@ namespace NadekoBot.Resources {
} }
/// <summary> /// <summary>
/// Looks up a localized string similar to imdb. /// Looks up a localized string similar to imdb omdb.
/// </summary> /// </summary>
public static string imdb_cmd { public static string imdb_cmd {
get { get {
@ -3120,7 +3120,7 @@ namespace NadekoBot.Resources {
} }
/// <summary> /// <summary>
/// Looks up a localized string similar to Queries imdb for movies or series, show first result.. /// Looks up a localized string similar to Queries omdb for movies or series, show first result..
/// </summary> /// </summary>
public static string imdb_desc { public static string imdb_desc {
get { get {

View File

@ -2044,10 +2044,10 @@
<value>`~ani aquarion evol`</value> <value>`~ani aquarion evol`</value>
</data> </data>
<data name="imdb_cmd" xml:space="preserve"> <data name="imdb_cmd" xml:space="preserve">
<value>imdb</value> <value>imdb omdb</value>
</data> </data>
<data name="imdb_desc" xml:space="preserve"> <data name="imdb_desc" xml:space="preserve">
<value>Queries imdb for movies or series, show first result.</value> <value>Queries omdb for movies or series, show first result.</value>
</data> </data>
<data name="imdb_usage" xml:space="preserve"> <data name="imdb_usage" xml:space="preserve">
<value>`~imdb Batman vs Superman`</value> <value>`~imdb Batman vs Superman`</value>