Moved from imdb to omdb
This commit is contained in:
parent
1dd8f9b5ea
commit
17f76da220
@ -1,168 +0,0 @@
|
|||||||
using NadekoBot.Extensions;
|
|
||||||
using NadekoBot.Modules.Searches.Models;
|
|
||||||
using System;
|
|
||||||
using System.Collections.Generic;
|
|
||||||
using System.Linq;
|
|
||||||
using System.Net.Http;
|
|
||||||
using System.Text.RegularExpressions;
|
|
||||||
using System.Threading.Tasks;
|
|
||||||
|
|
||||||
/*******************************************************************************
|
|
||||||
* Free ASP.net IMDb Scraper API for the new IMDb Template.
|
|
||||||
* Author: Abhinay Rathore
|
|
||||||
* Website: http://www.AbhinayRathore.com
|
|
||||||
* Blog: http://web3o.blogspot.com
|
|
||||||
* More Info: http://web3o.blogspot.com/2010/11/aspnetc-imdb-scraping-api.html
|
|
||||||
|
|
||||||
* Updated By: Gergo Torcsvari
|
|
||||||
* Last Updated: Feb, 2016
|
|
||||||
*******************************************************************************/
|
|
||||||
|
|
||||||
namespace NadekoBot.Modules.Searches.IMDB
|
|
||||||
{
|
|
||||||
public static class ImdbScraper
|
|
||||||
{
|
|
||||||
//Search Engine URLs
|
|
||||||
private static string GoogleSearch = "https://www.google.com/search?q=imdb+";
|
|
||||||
//Constructor
|
|
||||||
public static async Task<ImdbMovie> ImdbScrape(string MovieName, bool GetExtraInfo = true)
|
|
||||||
{
|
|
||||||
ImdbMovie mov = new ImdbMovie();
|
|
||||||
string imdbUrl = await GetIMDbUrlAsync(System.Uri.EscapeUriString(MovieName));
|
|
||||||
mov.Status = false;
|
|
||||||
if (!string.IsNullOrWhiteSpace(imdbUrl))
|
|
||||||
{
|
|
||||||
await ParseIMDbPage(imdbUrl, GetExtraInfo, mov);
|
|
||||||
}
|
|
||||||
|
|
||||||
return mov;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static async Task<ImdbMovie> ImdbScrapeFromId(string imdbId, bool GetExtraInfo = true)
|
|
||||||
{
|
|
||||||
ImdbMovie mov = new ImdbMovie();
|
|
||||||
string imdbUrl = "http://www.imdb.com/title/" + imdbId + "/";
|
|
||||||
mov.Status = false;
|
|
||||||
await ParseIMDbPage(imdbUrl, GetExtraInfo, mov);
|
|
||||||
return mov;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static async Task<string> GetIMDBId(string MovieName)
|
|
||||||
{
|
|
||||||
string imdbUrl = await GetIMDbUrlAsync(System.Uri.EscapeUriString(MovieName));
|
|
||||||
return Match(@"http://www.imdb.com/title/(tt\d{7})", imdbUrl);
|
|
||||||
}
|
|
||||||
//Get IMDb URL from search results
|
|
||||||
private static async Task<string> GetIMDbUrlAsync(string MovieName)
|
|
||||||
{
|
|
||||||
string url = GoogleSearch + MovieName;
|
|
||||||
string html = await GetUrlDataAsync(url);
|
|
||||||
List<string> imdbUrls = MatchAll(@"<a href=""(http://www.imdb.com/title/tt\d{7}/)"".*?>.*?</a>", html);
|
|
||||||
if (imdbUrls.Count > 0)
|
|
||||||
return (string)imdbUrls[0];
|
|
||||||
else return String.Empty;
|
|
||||||
}
|
|
||||||
//Parse IMDb page data
|
|
||||||
private static async Task ParseIMDbPage(string imdbUrl, bool GetExtraInfo, ImdbMovie mov)
|
|
||||||
{
|
|
||||||
string html = await GetUrlDataAsync(imdbUrl + "combined");
|
|
||||||
mov.Id = Match(@"<link rel=""canonical"" href=""http://www.imdb.com/title/(tt\d{7})/combined"" />", html);
|
|
||||||
if (!string.IsNullOrEmpty(mov.Id))
|
|
||||||
{
|
|
||||||
mov.Status = true;
|
|
||||||
mov.Title = Match(@"<title>(IMDb \- )*(.*?) \(.*?</title>", html, 2);
|
|
||||||
mov.OriginalTitle = Match(@"title-extra"">(.*?)<", html);
|
|
||||||
mov.Year = Match(@"<title>.*?\(.*?(\d{4}).*?\).*?</title>", Match(@"(<title>.*?</title>)", html));
|
|
||||||
mov.Rating = Match(@"<b>(\d.\d)/10</b>", html);
|
|
||||||
mov.Genres = MatchAll(@"<a.*?>(.*?)</a>", Match(@"Genre.?:((.|\n)*?)(<\/div>|See more)", html)).Cast<string>().ToList();
|
|
||||||
mov.Plot = Match(@"Plot:</h5>\n<div class=""info-content"">\n((.|\n)*?)(<a|</div)", html);
|
|
||||||
mov.Poster = Match(@"<a name=""poster"".*src=""(.*)""", html);
|
|
||||||
mov.ImdbURL = "http://www.imdb.com/title/" + mov.Id + "/";
|
|
||||||
if (GetExtraInfo)
|
|
||||||
{
|
|
||||||
string plotHtml = await GetUrlDataAsync(imdbUrl + "plotsummary");
|
|
||||||
await GetReleaseDatesAndAka(mov);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//Get all release dates and aka-s
|
|
||||||
private static async Task GetReleaseDatesAndAka(ImdbMovie mov)
|
|
||||||
{
|
|
||||||
Dictionary<string, string> release = new Dictionary<string, string>();
|
|
||||||
string releasehtml = await GetUrlDataAsync("http://www.imdb.com/title/" + mov.Id + "/releaseinfo");
|
|
||||||
foreach (string r in MatchAll(@"<tr class="".*?"">(.*?)</tr>", Match(@"<table id=""release_dates"" class=""subpage_data spFirst"">\n*?(.*?)</table>", releasehtml)))
|
|
||||||
{
|
|
||||||
Match rd = new Regex(@"<td>(.*?)</td>\n*?.*?<td class=.*?>(.*?)</td>", RegexOptions.Multiline).Match(r);
|
|
||||||
release[StripHTML(rd.Groups[1].Value.Trim())] = StripHTML(rd.Groups[2].Value.Trim());
|
|
||||||
}
|
|
||||||
//mov.ReleaseDates = release;
|
|
||||||
|
|
||||||
Dictionary<string, string> aka = new Dictionary<string, string>();
|
|
||||||
List<string> list = MatchAll(@".*?<tr class="".*?"">(.*?)</tr>", Match(@"<table id=""akas"" class=.*?>\n*?(.*?)</table>", releasehtml));
|
|
||||||
foreach (string r in list)
|
|
||||||
{
|
|
||||||
Match rd = new Regex(@"\n*?.*?<td>(.*?)</td>\n*?.*?<td>(.*?)</td>", RegexOptions.Multiline).Match(r);
|
|
||||||
aka[StripHTML(rd.Groups[1].Value.Trim())] = StripHTML(rd.Groups[2].Value.Trim());
|
|
||||||
}
|
|
||||||
mov.Aka = aka;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
//Get all media images
|
|
||||||
private static async Task<List<string>> GetMediaImages(ImdbMovie mov)
|
|
||||||
{
|
|
||||||
List<string> list = new List<string>();
|
|
||||||
string mediaurl = "http://www.imdb.com/title/" + mov.Id + "/mediaindex";
|
|
||||||
string mediahtml = await GetUrlDataAsync(mediaurl);
|
|
||||||
int pagecount = MatchAll(@"<a href=""\?page=(.*?)"">", Match(@"<span style=""padding: 0 1em;"">(.*?)</span>", mediahtml)).Count;
|
|
||||||
for (int p = 1; p <= pagecount + 1; p++)
|
|
||||||
{
|
|
||||||
mediahtml = await GetUrlDataAsync(mediaurl + "?page=" + p);
|
|
||||||
foreach (Match m in new Regex(@"src=""(.*?)""", RegexOptions.Multiline).Matches(Match(@"<div class=""thumb_list"" style=""font-size: 0px;"">(.*?)</div>", mediahtml)))
|
|
||||||
{
|
|
||||||
String image = m.Groups[1].Value;
|
|
||||||
list.Add(Regex.Replace(image, @"_V1\..*?.jpg", "_V1._SY0.jpg"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return list;
|
|
||||||
}
|
|
||||||
//Get Recommended Titles
|
|
||||||
private static async Task<List<string>> GetRecommendedTitlesAsync(ImdbMovie mov)
|
|
||||||
{
|
|
||||||
List<string> list = new List<string>();
|
|
||||||
string recUrl = "http://www.imdb.com/widget/recommendations/_ajax/get_more_recs?specs=p13nsims%3A" + mov.Id;
|
|
||||||
string json = await GetUrlDataAsync(recUrl);
|
|
||||||
return MatchAll(@"title=\\""(.*?)\\""", json);
|
|
||||||
}
|
|
||||||
/*******************************[ Helper Methods ]********************************/
|
|
||||||
//Match single instance
|
|
||||||
private static string Match(string regex, string html, int i = 1)
|
|
||||||
{
|
|
||||||
var m = new Regex(regex, RegexOptions.Multiline).Match(html);
|
|
||||||
return m.Groups[i].Value.Trim();
|
|
||||||
}
|
|
||||||
//Match all instances and return as List<string>
|
|
||||||
private static List<string> MatchAll(string regex, string html, int i = 1)
|
|
||||||
{
|
|
||||||
List<string> list = new List<string>();
|
|
||||||
foreach (Match m in new Regex(regex, RegexOptions.Multiline).Matches(html))
|
|
||||||
list.Add(m.Groups[i].Value.Trim());
|
|
||||||
return list;
|
|
||||||
}
|
|
||||||
//Strip HTML Tags
|
|
||||||
private static string StripHTML(string inputString)
|
|
||||||
{
|
|
||||||
return Regex.Replace(inputString, @"<.*?>", string.Empty);
|
|
||||||
}
|
|
||||||
//Get URL Data
|
|
||||||
private static async Task<string> GetUrlDataAsync(string url)
|
|
||||||
{
|
|
||||||
using (var http = new HttpClient())
|
|
||||||
{
|
|
||||||
http.AddFakeHeaders();
|
|
||||||
return await http.GetStringAsync(url);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -12,11 +12,11 @@ using System.Text.RegularExpressions;
|
|||||||
using System.Net;
|
using System.Net;
|
||||||
using Discord.WebSocket;
|
using Discord.WebSocket;
|
||||||
using NadekoBot.Modules.Searches.Models;
|
using NadekoBot.Modules.Searches.Models;
|
||||||
using NadekoBot.Modules.Searches.IMDB;
|
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using ImageProcessorCore;
|
using ImageProcessorCore;
|
||||||
using NadekoBot.Extensions;
|
using NadekoBot.Extensions;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
|
using NadekoBot.Modules.Searches.Commands.OMDB;
|
||||||
|
|
||||||
namespace NadekoBot.Modules.Searches
|
namespace NadekoBot.Modules.Searches
|
||||||
{
|
{
|
||||||
@ -66,10 +66,6 @@ $@"🌍 **Weather for** 【{obj["target"]}】
|
|||||||
await channel.SendMessageAsync(result).ConfigureAwait(false);
|
await channel.SendMessageAsync(result).ConfigureAwait(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
//todo move to omdb
|
|
||||||
// |
|
|
||||||
// v
|
|
||||||
//{"Title":"Shutter Island","Year":"2010","Rated":"R","Released":"19 Feb 2010","Runtime":"138 min","Genre":"Mystery, Thriller","Director":"Martin Scorsese","Writer":"Laeta Kalogridis (screenplay), Dennis Lehane (novel)","Actors":"Leonardo DiCaprio, Mark Ruffalo, Ben Kingsley, Max von Sydow","Plot":"In 1954, a U.S. marshal investigates the disappearance of a murderess who escaped from a hospital for the criminally insane.","Language":"English, German","Country":"USA","Awards":"8 wins & 59 nominations.","Poster":"https://images-na.ssl-images-amazon.com/images/M/MV5BMTMxMTIyNzMxMV5BMl5BanBnXkFtZTcwOTc4OTI3Mg@@._V1_SX300.jpg","Metascore":"63","imdbRating":"8.1","imdbVotes":"798,447","imdbID":"tt1130884","Type":"movie","Response":"True"}
|
|
||||||
[NadekoCommand, Usage, Description, Aliases]
|
[NadekoCommand, Usage, Description, Aliases]
|
||||||
[RequireContext(ContextType.Guild)]
|
[RequireContext(ContextType.Guild)]
|
||||||
public async Task Imdb(IUserMessage umsg, [Remainder] string query = null)
|
public async Task Imdb(IUserMessage umsg, [Remainder] string query = null)
|
||||||
@ -78,21 +74,14 @@ $@"🌍 **Weather for** 【{obj["target"]}】
|
|||||||
|
|
||||||
if (!(await ValidateQuery(channel, query).ConfigureAwait(false))) return;
|
if (!(await ValidateQuery(channel, query).ConfigureAwait(false))) return;
|
||||||
await umsg.Channel.TriggerTypingAsync().ConfigureAwait(false);
|
await umsg.Channel.TriggerTypingAsync().ConfigureAwait(false);
|
||||||
string result;
|
|
||||||
try
|
var movie = await OmdbProvider.FindMovie(query);
|
||||||
{
|
if (movie == null)
|
||||||
var movie = await ImdbScraper.ImdbScrape(query, true);
|
|
||||||
if (movie.Status) result = movie.ToString();
|
|
||||||
else result = "Failed to find that movie.";
|
|
||||||
}
|
|
||||||
catch (Exception ex)
|
|
||||||
{
|
{
|
||||||
await channel.SendMessageAsync("Failed to find that movie.").ConfigureAwait(false);
|
await channel.SendMessageAsync("Failed to find that movie.").ConfigureAwait(false);
|
||||||
_log.Warn(ex);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
await channel.SendMessageAsync(movie.ToString()).ConfigureAwait(false);
|
||||||
await channel.SendMessageAsync(result.ToString()).ConfigureAwait(false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
[NadekoCommand, Usage, Description, Aliases]
|
[NadekoCommand, Usage, Description, Aliases]
|
||||||
|
@ -3111,7 +3111,7 @@ namespace NadekoBot.Resources {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Looks up a localized string similar to imdb.
|
/// Looks up a localized string similar to imdb omdb.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public static string imdb_cmd {
|
public static string imdb_cmd {
|
||||||
get {
|
get {
|
||||||
@ -3120,7 +3120,7 @@ namespace NadekoBot.Resources {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Looks up a localized string similar to Queries imdb for movies or series, show first result..
|
/// Looks up a localized string similar to Queries omdb for movies or series, show first result..
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public static string imdb_desc {
|
public static string imdb_desc {
|
||||||
get {
|
get {
|
||||||
|
@ -2044,10 +2044,10 @@
|
|||||||
<value>`~ani aquarion evol`</value>
|
<value>`~ani aquarion evol`</value>
|
||||||
</data>
|
</data>
|
||||||
<data name="imdb_cmd" xml:space="preserve">
|
<data name="imdb_cmd" xml:space="preserve">
|
||||||
<value>imdb</value>
|
<value>imdb omdb</value>
|
||||||
</data>
|
</data>
|
||||||
<data name="imdb_desc" xml:space="preserve">
|
<data name="imdb_desc" xml:space="preserve">
|
||||||
<value>Queries imdb for movies or series, show first result.</value>
|
<value>Queries omdb for movies or series, show first result.</value>
|
||||||
</data>
|
</data>
|
||||||
<data name="imdb_usage" xml:space="preserve">
|
<data name="imdb_usage" xml:space="preserve">
|
||||||
<value>`~imdb Batman vs Superman`</value>
|
<value>`~imdb Batman vs Superman`</value>
|
||||||
|
Loading…
Reference in New Issue
Block a user