add csfloat api usage
This commit is contained in:
21
BlueLaminate/BlueLaminate.Scraper/Proxies/IProxyProvider.cs
Normal file
21
BlueLaminate/BlueLaminate.Scraper/Proxies/IProxyProvider.cs
Normal file
@@ -0,0 +1,21 @@
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// Source of proxy endpoints. The whole point of this seam is that the rest of
|
||||
/// the scraper depends only on this interface and <see cref="ProxyLease"/>, so a
|
||||
/// different residential provider — or the future C2 that allocates IPs to
|
||||
/// containers, or a composite "grab-bag" over several providers — drops in
|
||||
/// without changing any browser or scraping code.
|
||||
/// </summary>
|
||||
public interface IProxyProvider
|
||||
{
|
||||
/// <summary>Identifier recorded on issued leases, e.g. "iproyal".</summary>
|
||||
string Name { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Produce a usable endpoint for the given request. For gateway providers
|
||||
/// this is pure string composition (no network call); the C2 implementation
|
||||
/// can override that later with real allocation.
|
||||
/// </summary>
|
||||
ProxyLease Acquire(ProxyRequest request);
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="IProxyProvider"/> for IPRoyal's residential gateway. IPRoyal keeps
|
||||
/// one fixed host/port (geo.iproyal.com:12321) and encodes everything else —
|
||||
/// country, sticky-session id, session lifetime — as underscore-delimited
|
||||
/// parameters appended to the account password. Example password:
|
||||
/// "secret_country-us_session-ab12cd_lifetime-30m". The account username is sent
|
||||
/// unchanged. Docs: https://docs.iproyal.com/proxies/residential/proxy
|
||||
/// </summary>
|
||||
public sealed class IpRoyalProxyProvider : IProxyProvider
|
||||
{
|
||||
public const string GatewayHost = "geo.iproyal.com";
|
||||
public const int GatewayPort = 12321;
|
||||
|
||||
// IPRoyal caps sticky sessions; 30 minutes is a safe default that comfortably
|
||||
// covers a single scrape pass without forcing an early IP rotation.
|
||||
private static readonly TimeSpan DefaultLifetime = TimeSpan.FromMinutes(30);
|
||||
|
||||
private readonly string _username;
|
||||
private readonly string _password;
|
||||
|
||||
public IpRoyalProxyProvider(string username, string password)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(username))
|
||||
throw new ArgumentException("IPRoyal username is required.", nameof(username));
|
||||
if (string.IsNullOrWhiteSpace(password))
|
||||
throw new ArgumentException("IPRoyal password is required.", nameof(password));
|
||||
|
||||
_username = username;
|
||||
_password = password;
|
||||
}
|
||||
|
||||
public string Name => "iproyal";
|
||||
|
||||
public ProxyLease Acquire(ProxyRequest request)
|
||||
{
|
||||
var password = _password;
|
||||
string? sessionId = null;
|
||||
DateTimeOffset? expiresAt = null;
|
||||
|
||||
// Country first; the router picks one at random when several are listed.
|
||||
if (!string.IsNullOrWhiteSpace(request.Country))
|
||||
password += $"_country-{request.Country.Trim().ToLowerInvariant()}";
|
||||
|
||||
if (request.Sticky)
|
||||
{
|
||||
sessionId = request.SessionId ?? NewSessionId();
|
||||
var lifetime = request.Lifetime ?? DefaultLifetime;
|
||||
// IPRoyal expresses lifetime as whole minutes (e.g. "_lifetime-30m").
|
||||
var minutes = Math.Max(1, (int)Math.Round(lifetime.TotalMinutes));
|
||||
password += $"_session-{sessionId}_lifetime-{minutes}m";
|
||||
expiresAt = DateTimeOffset.UtcNow.AddMinutes(minutes);
|
||||
}
|
||||
|
||||
return new ProxyLease(
|
||||
Host: GatewayHost,
|
||||
Port: GatewayPort,
|
||||
Username: _username,
|
||||
Password: password,
|
||||
Provider: Name,
|
||||
SessionId: sessionId,
|
||||
ExpiresAt: expiresAt);
|
||||
}
|
||||
|
||||
// Short, URL/param-safe token. IPRoyal treats the session value opaquely;
|
||||
// it only needs to be stable for the duration of a sticky lease.
|
||||
private static string NewSessionId() =>
|
||||
Guid.NewGuid().ToString("N")[..10];
|
||||
}
|
||||
29
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyLease.cs
Normal file
29
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyLease.cs
Normal file
@@ -0,0 +1,29 @@
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// A concrete, ready-to-use proxy endpoint handed back by an
|
||||
/// <see cref="IProxyProvider"/>. This is the only proxy type the browser layer
|
||||
/// ever sees, so swapping providers (or mixing several in a grab-bag) never
|
||||
/// touches the Selenium code. <see cref="Username"/> and <see cref="Password"/>
|
||||
/// are the literal credentials to present to the gateway — for providers like
|
||||
/// IPRoyal the targeting/session parameters are already baked into them.
|
||||
/// </summary>
|
||||
/// <param name="Host">Gateway host, e.g. "geo.iproyal.com".</param>
|
||||
/// <param name="Port">Gateway port, e.g. 12321.</param>
|
||||
/// <param name="Username">Credential username for the gateway.</param>
|
||||
/// <param name="Password">Credential password (may carry encoded session/geo params).</param>
|
||||
/// <param name="Provider">Name of the provider that issued this lease.</param>
|
||||
/// <param name="SessionId">The sticky session key, if this is a pinned IP.</param>
|
||||
/// <param name="ExpiresAt">When a sticky IP may be recycled; null if rotating/unbounded.</param>
|
||||
public sealed record ProxyLease(
|
||||
string Host,
|
||||
int Port,
|
||||
string Username,
|
||||
string Password,
|
||||
string Provider,
|
||||
string? SessionId = null,
|
||||
DateTimeOffset? ExpiresAt = null)
|
||||
{
|
||||
/// <summary>"host:port" form used by browser proxy settings.</summary>
|
||||
public string Endpoint => $"{Host}:{Port}";
|
||||
}
|
||||
97
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyProbe.cs
Normal file
97
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyProbe.cs
Normal file
@@ -0,0 +1,97 @@
|
||||
using System.Text.Json;
|
||||
using BlueLaminate.Scraper.Browser;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium;
|
||||
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>The exit IP a proxy lease actually resolves to, per ipinfo.io.</summary>
|
||||
/// <param name="Org">
|
||||
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
|
||||
/// residential vs. datacenter: a consumer ISP here means a real residential
|
||||
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
|
||||
/// </param>
|
||||
public sealed record ProxyExitInfo(
|
||||
string? Ip,
|
||||
string? City,
|
||||
string? Region,
|
||||
string? Country,
|
||||
string? Org,
|
||||
string? Hostname,
|
||||
string? Timezone);
|
||||
|
||||
/// <summary>
|
||||
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
|
||||
/// launch the real browser through it, and read back the exit IP from an
|
||||
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
|
||||
/// against a metered residential plan — it proves auth works and shows whether
|
||||
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
|
||||
/// </summary>
|
||||
public sealed class ProxyProbe
|
||||
{
|
||||
private const string IpEchoUrl = "https://ipinfo.io/json";
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
};
|
||||
|
||||
private readonly IProxyProvider _provider;
|
||||
private readonly BrowserDriverFactory _factory;
|
||||
private readonly ILogger<ProxyProbe> _logger;
|
||||
|
||||
public ProxyProbe(
|
||||
IProxyProvider provider,
|
||||
BrowserDriverFactory factory,
|
||||
ILogger<ProxyProbe> logger)
|
||||
{
|
||||
_provider = provider;
|
||||
_factory = factory;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<ProxyExitInfo> RunAsync(ProxyRequest request)
|
||||
{
|
||||
var lease = _provider.Acquire(request);
|
||||
_logger.LogInformation(
|
||||
"Acquired {Provider} lease (exit {Mode}).",
|
||||
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
|
||||
|
||||
var driver = await _factory.CreateAsync(lease, blockImages: true);
|
||||
try
|
||||
{
|
||||
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
|
||||
driver.Navigate().GoToUrl(IpEchoUrl);
|
||||
|
||||
// Read the document's text rather than the DOM so the browser's
|
||||
// built-in JSON viewer doesn't get in the way, then carve out the
|
||||
// JSON object it rendered.
|
||||
var rendered = ((IJavaScriptExecutor)driver)
|
||||
.ExecuteScript("return document.documentElement.innerText;") as string
|
||||
?? throw new InvalidOperationException("Browser returned no page text.");
|
||||
|
||||
var info = JsonSerializer.Deserialize<ProxyExitInfo>(ExtractJson(rendered), JsonOptions)
|
||||
?? throw new InvalidOperationException("IP-echo response was empty.");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
|
||||
info.Ip, info.City, info.Region, info.Country, info.Org);
|
||||
|
||||
return info;
|
||||
}
|
||||
finally
|
||||
{
|
||||
driver.Quit();
|
||||
}
|
||||
}
|
||||
|
||||
private static string ExtractJson(string text)
|
||||
{
|
||||
var start = text.IndexOf('{');
|
||||
var end = text.LastIndexOf('}');
|
||||
if (start < 0 || end <= start)
|
||||
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
|
||||
|
||||
return text[start..(end + 1)];
|
||||
}
|
||||
}
|
||||
30
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyRequest.cs
Normal file
30
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyRequest.cs
Normal file
@@ -0,0 +1,30 @@
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// What kind of exit IP the caller wants. Provider-agnostic: each
|
||||
/// <see cref="IProxyProvider"/> translates these knobs into its own gateway
|
||||
/// syntax. A sticky request asks the provider to pin one residential IP for the
|
||||
/// session's lifetime; a non-sticky request lets the IP rotate per connection.
|
||||
/// </summary>
|
||||
/// <param name="Country">
|
||||
/// Optional ISO 3166-1 alpha-2 code, or a comma-separated list to let the
|
||||
/// provider pick one at random (e.g. "us" or "us,gb,de"). Null means no
|
||||
/// geo constraint.
|
||||
/// </param>
|
||||
/// <param name="Sticky">
|
||||
/// True to keep the same exit IP for the whole session; false to rotate.
|
||||
/// </param>
|
||||
/// <param name="SessionId">
|
||||
/// Optional caller-supplied session key for a sticky lease. When null and
|
||||
/// <paramref name="Sticky"/> is true the provider generates one.
|
||||
/// </param>
|
||||
/// <param name="Lifetime">
|
||||
/// How long a sticky IP should be held before the provider may recycle it.
|
||||
/// Ignored when <paramref name="Sticky"/> is false. Null lets the provider
|
||||
/// apply its own default.
|
||||
/// </param>
|
||||
public sealed record ProxyRequest(
|
||||
string? Country = null,
|
||||
bool Sticky = true,
|
||||
string? SessionId = null,
|
||||
TimeSpan? Lifetime = null);
|
||||
Reference in New Issue
Block a user