remove selenium related code for now
This commit is contained in:
@@ -1,9 +1,7 @@
|
|||||||
using BlueLaminate.Cli;
|
using BlueLaminate.Cli;
|
||||||
using BlueLaminate.Cli.Logging;
|
using BlueLaminate.Cli.Logging;
|
||||||
using BlueLaminate.EFCore.Data;
|
using BlueLaminate.EFCore.Data;
|
||||||
using BlueLaminate.Scraper.Browser;
|
|
||||||
using BlueLaminate.Scraper.CsFloat;
|
using BlueLaminate.Scraper.CsFloat;
|
||||||
using BlueLaminate.Scraper.Proxies;
|
|
||||||
using BlueLaminate.Scraper.Skins;
|
using BlueLaminate.Scraper.Skins;
|
||||||
using Microsoft.Extensions.Logging;
|
using Microsoft.Extensions.Logging;
|
||||||
using OpenTelemetry;
|
using OpenTelemetry;
|
||||||
@@ -49,30 +47,6 @@ syncSkins.SetAction((parseResult, ct) =>
|
|||||||
loggerFactory,
|
loggerFactory,
|
||||||
ct));
|
ct));
|
||||||
|
|
||||||
var countryOption = new Option<string?>("--country")
|
|
||||||
{
|
|
||||||
Description = "Optional ISO country code(s) for the exit IP, e.g. \"us\" or \"us,gb\". Default: random."
|
|
||||||
};
|
|
||||||
var rotatingOption = new Option<bool>("--rotating")
|
|
||||||
{
|
|
||||||
Description = "Use a rotating exit IP instead of a pinned (sticky) session."
|
|
||||||
};
|
|
||||||
|
|
||||||
var probeProxy = new Command(
|
|
||||||
"probe-proxy",
|
|
||||||
"Launch non-headless Edge through the IPRoyal residential proxy and print the exit IP "
|
|
||||||
+ "to confirm auth works and the IP is residential. Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.")
|
|
||||||
{
|
|
||||||
countryOption,
|
|
||||||
rotatingOption,
|
|
||||||
};
|
|
||||||
probeProxy.SetAction((parseResult, ct) =>
|
|
||||||
ProbeProxyAsync(
|
|
||||||
parseResult.GetValue(countryOption),
|
|
||||||
parseResult.GetValue(rotatingOption),
|
|
||||||
loggerFactory,
|
|
||||||
ct));
|
|
||||||
|
|
||||||
var defIndexOption = new Option<int?>("--def-index")
|
var defIndexOption = new Option<int?>("--def-index")
|
||||||
{
|
{
|
||||||
Description = "CSFloat weapon def_index (e.g. AK-47=7, M4A4=16)."
|
Description = "CSFloat weapon def_index (e.g. AK-47=7, M4A4=16)."
|
||||||
@@ -81,50 +55,6 @@ var paintIndexOption = new Option<int?>("--paint-index")
|
|||||||
{
|
{
|
||||||
Description = "CSFloat paint_index for a specific skin (e.g. M4A4 | Cyber Security=985)."
|
Description = "CSFloat paint_index for a specific skin (e.g. M4A4 | Cyber Security=985)."
|
||||||
};
|
};
|
||||||
var urlOption = new Option<string?>("--url")
|
|
||||||
{
|
|
||||||
Description = "Full CSFloat URL to open. Overrides --def-index/--paint-index when set."
|
|
||||||
};
|
|
||||||
var loadImagesOption = new Option<bool>("--load-images")
|
|
||||||
{
|
|
||||||
Description = "Load images (uses more bandwidth). Default off to conserve the metered plan."
|
|
||||||
};
|
|
||||||
var diagnoseOption = new Option<bool>("--diagnose")
|
|
||||||
{
|
|
||||||
Description = "Log every CSFloat-domain response (url + status + type) to reveal where a "
|
|
||||||
+ "Steam-login wall appears, not just /api/ JSON."
|
|
||||||
};
|
|
||||||
var outOption = new Option<string>("--out")
|
|
||||||
{
|
|
||||||
Description = "Directory to write captured JSON to.",
|
|
||||||
DefaultValueFactory = _ => "captures",
|
|
||||||
};
|
|
||||||
|
|
||||||
var captureCsfloat = new Command(
|
|
||||||
"capture-csfloat",
|
|
||||||
"Open a CSFloat search page through the residential proxy and dump every CSFloat /api/ "
|
|
||||||
+ "JSON response to disk while you browse (open a listing → 'Latest Sales'). "
|
|
||||||
+ "Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.")
|
|
||||||
{
|
|
||||||
defIndexOption,
|
|
||||||
paintIndexOption,
|
|
||||||
urlOption,
|
|
||||||
countryOption,
|
|
||||||
loadImagesOption,
|
|
||||||
diagnoseOption,
|
|
||||||
outOption,
|
|
||||||
};
|
|
||||||
captureCsfloat.SetAction((parseResult, ct) =>
|
|
||||||
CaptureCsfloatAsync(
|
|
||||||
parseResult.GetValue(defIndexOption),
|
|
||||||
parseResult.GetValue(paintIndexOption),
|
|
||||||
parseResult.GetValue(urlOption),
|
|
||||||
parseResult.GetValue(countryOption),
|
|
||||||
parseResult.GetValue(loadImagesOption),
|
|
||||||
parseResult.GetValue(diagnoseOption),
|
|
||||||
parseResult.GetValue(outOption)!,
|
|
||||||
loggerFactory,
|
|
||||||
ct));
|
|
||||||
|
|
||||||
var sortByOption = new Option<string>("--sort-by")
|
var sortByOption = new Option<string>("--sort-by")
|
||||||
{
|
{
|
||||||
@@ -226,8 +156,6 @@ sweepCatalog.SetAction((parseResult, ct) =>
|
|||||||
var root = new RootCommand("BlueLaminate CLI — Counter-Strike skin tracker tools.")
|
var root = new RootCommand("BlueLaminate CLI — Counter-Strike skin tracker tools.")
|
||||||
{
|
{
|
||||||
syncSkins,
|
syncSkins,
|
||||||
probeProxy,
|
|
||||||
captureCsfloat,
|
|
||||||
fetchListings,
|
fetchListings,
|
||||||
sweepListings,
|
sweepListings,
|
||||||
sweepCatalog,
|
sweepCatalog,
|
||||||
@@ -235,111 +163,6 @@ var root = new RootCommand("BlueLaminate CLI — Counter-Strike skin tracker too
|
|||||||
|
|
||||||
return await root.Parse(args).InvokeAsync();
|
return await root.Parse(args).InvokeAsync();
|
||||||
|
|
||||||
// Acquire an IPRoyal residential lease, drive a real (non-headless) Edge browser
|
|
||||||
// through it, and report the exit IP. This is the proxy/Selenium spike: it proves
|
|
||||||
// authenticated residential routing end-to-end for a few KB before any CSFloat
|
|
||||||
// scraping spends real bandwidth.
|
|
||||||
static async Task<int> ProbeProxyAsync(
|
|
||||||
string? country, bool rotating, ILoggerFactory loggerFactory, CancellationToken ct)
|
|
||||||
{
|
|
||||||
var username = Environment.GetEnvironmentVariable("IPROYAL_USERNAME");
|
|
||||||
var password = Environment.GetEnvironmentVariable("IPROYAL_PASSWORD");
|
|
||||||
if (string.IsNullOrWhiteSpace(username) || string.IsNullOrWhiteSpace(password))
|
|
||||||
{
|
|
||||||
Console.Error.WriteLine(
|
|
||||||
"Set IPROYAL_USERNAME and IPROYAL_PASSWORD environment variables first.");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
var provider = new IpRoyalProxyProvider(username, password);
|
|
||||||
var factory = new BrowserDriverFactory(loggerFactory.CreateLogger<BrowserDriverFactory>());
|
|
||||||
var probe = new ProxyProbe(provider, factory, loggerFactory.CreateLogger<ProxyProbe>());
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
var info = await probe.RunAsync(new ProxyRequest(Country: country, Sticky: !rotating));
|
|
||||||
Console.WriteLine();
|
|
||||||
Console.WriteLine($" Exit IP : {info.Ip}");
|
|
||||||
Console.WriteLine($" Location: {info.City}, {info.Region}, {info.Country}");
|
|
||||||
Console.WriteLine($" Org/ASN : {info.Org}");
|
|
||||||
Console.WriteLine($" Hostname: {info.Hostname ?? "—"}");
|
|
||||||
Console.WriteLine();
|
|
||||||
Console.WriteLine(
|
|
||||||
"Check Org/ASN: a consumer ISP = residential; a hosting provider = datacenter.");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
catch (Exception ex)
|
|
||||||
{
|
|
||||||
Console.Error.WriteLine($"Proxy probe failed: {ex.Message}");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Phase B: open a CSFloat search page through the residential proxy and dump
|
|
||||||
// every CSFloat /api/ JSON response to disk while the operator browses. This is
|
|
||||||
// how we discover the real endpoint/field shapes (active listings + Latest
|
|
||||||
// Sales) before designing tables or automating navigation.
|
|
||||||
static async Task<int> CaptureCsfloatAsync(
|
|
||||||
int? defIndex, int? paintIndex, string? url, string? country,
|
|
||||||
bool loadImages, bool diagnose, string outDir, ILoggerFactory loggerFactory, CancellationToken ct)
|
|
||||||
{
|
|
||||||
var username = Environment.GetEnvironmentVariable("IPROYAL_USERNAME");
|
|
||||||
var password = Environment.GetEnvironmentVariable("IPROYAL_PASSWORD");
|
|
||||||
if (string.IsNullOrWhiteSpace(username) || string.IsNullOrWhiteSpace(password))
|
|
||||||
{
|
|
||||||
Console.Error.WriteLine(
|
|
||||||
"Set IPROYAL_USERNAME and IPROYAL_PASSWORD environment variables first.");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
var targetUrl = BuildCsfloatUrl(url, defIndex, paintIndex);
|
|
||||||
var provider = new IpRoyalProxyProvider(username, password);
|
|
||||||
var factory = new BrowserDriverFactory(loggerFactory.CreateLogger<BrowserDriverFactory>());
|
|
||||||
var capture = new CsFloatCaptureService(
|
|
||||||
provider, factory, loggerFactory.CreateLogger<CsFloatCaptureService>());
|
|
||||||
|
|
||||||
Console.WriteLine($"Opening {targetUrl}");
|
|
||||||
Console.WriteLine(
|
|
||||||
"When the page loads: click a listing, then the 'Latest Sales' tab. "
|
|
||||||
+ "Capturing all CSFloat /api/ responses.");
|
|
||||||
Console.WriteLine("Press Enter here when you're done to close the browser.");
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
// Block until the operator presses Enter; the browser stays open and
|
|
||||||
// capturing the whole time. ReadLine is sync, so push it off-thread.
|
|
||||||
var count = await capture.RunAsync(
|
|
||||||
targetUrl,
|
|
||||||
outDir,
|
|
||||||
new ProxyRequest(Country: country, Sticky: true),
|
|
||||||
loadImages,
|
|
||||||
diagnose,
|
|
||||||
() => Task.Run(() => Console.ReadLine(), ct));
|
|
||||||
|
|
||||||
var full = Path.GetFullPath(outDir);
|
|
||||||
Console.WriteLine();
|
|
||||||
Console.WriteLine($"Captured {count} response(s) to {full}");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
catch (Exception ex)
|
|
||||||
{
|
|
||||||
Console.Error.WriteLine($"CSFloat capture failed: {ex.Message}");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Prefer an explicit --url; otherwise build a search URL from the indexes,
|
|
||||||
// defaulting to the M4A4 | Cyber Security example so the command runs as-is.
|
|
||||||
static string BuildCsfloatUrl(string? url, int? defIndex, int? paintIndex)
|
|
||||||
{
|
|
||||||
if (!string.IsNullOrWhiteSpace(url))
|
|
||||||
return url;
|
|
||||||
|
|
||||||
var def = defIndex ?? 16;
|
|
||||||
var paint = paintIndex ?? 985;
|
|
||||||
return $"https://csfloat.com/search?def_index={def}&paint_index={paint}";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch active listings for one skin via CSFloat's official API and print them.
|
// Fetch active listings for one skin via CSFloat's official API and print them.
|
||||||
// Fetch-and-print only — no DB — so we can verify the real field shapes against a
|
// Fetch-and-print only — no DB — so we can verify the real field shapes against a
|
||||||
// live key before designing the Listing schema. Defaults to the M4A4 | Cyber
|
// live key before designing the Listing schema. Defaults to the M4A4 | Cyber
|
||||||
|
|||||||
@@ -8,7 +8,6 @@
|
|||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.8" />
|
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.8" />
|
||||||
<PackageReference Include="Selenium.WebDriver" Version="4.44.0" />
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
@@ -1,88 +0,0 @@
|
|||||||
using BlueLaminate.Scraper.Proxies;
|
|
||||||
using Microsoft.Extensions.Logging;
|
|
||||||
using OpenQA.Selenium;
|
|
||||||
using OpenQA.Selenium.Edge;
|
|
||||||
|
|
||||||
namespace BlueLaminate.Scraper.Browser;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Builds a non-headless Edge (Chromium) WebDriver routed through a
|
|
||||||
/// <see cref="ProxyLease"/>. Two things make this non-trivial:
|
|
||||||
/// <list type="bullet">
|
|
||||||
/// <item>Proxy authentication. Chromium can't auto-fill the gateway's auth
|
|
||||||
/// dialog under automation, and the classic extension trick relies on
|
|
||||||
/// Manifest V2 which current Chromium disables. Instead we answer the proxy's
|
|
||||||
/// 407 challenge through the DevTools (CDP) auth handler, which works
|
|
||||||
/// non-headless and needs no extension.</item>
|
|
||||||
/// <item>Bandwidth. The residential plan is metered per GB, so images are
|
|
||||||
/// disabled at the content-settings level. Cloudflare gates on JS execution and
|
|
||||||
/// TLS/behaviour, not whether pictures render, so this stays realistic.</item>
|
|
||||||
/// </list>
|
|
||||||
/// Each driver gets a throwaway user-data dir so runs never share cookies and
|
|
||||||
/// never touch the user's real Edge profile.
|
|
||||||
/// </summary>
|
|
||||||
public sealed class BrowserDriverFactory
|
|
||||||
{
|
|
||||||
private readonly ILogger<BrowserDriverFactory> _logger;
|
|
||||||
|
|
||||||
public BrowserDriverFactory(ILogger<BrowserDriverFactory> logger)
|
|
||||||
{
|
|
||||||
_logger = logger;
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<IWebDriver> CreateAsync(ProxyLease lease, bool blockImages = true)
|
|
||||||
{
|
|
||||||
var options = new EdgeOptions();
|
|
||||||
|
|
||||||
// Route browser traffic through the gateway via the launch argument
|
|
||||||
// rather than EdgeOptions.Proxy. Setting Proxy makes Selenium hand the
|
|
||||||
// gateway to Selenium Manager for the driver *download* too, which fails
|
|
||||||
// because that step can't authenticate. The arg scopes the proxy to the
|
|
||||||
// browser only; credentials are answered below via CDP. No scheme = all
|
|
||||||
// protocols use the gateway.
|
|
||||||
options.AddArgument($"--proxy-server={lease.Endpoint}");
|
|
||||||
|
|
||||||
// Reduce the most obvious automation tells; residential exit + a real
|
|
||||||
// (non-headless) browser do the rest.
|
|
||||||
options.AddArgument("--disable-blink-features=AutomationControlled");
|
|
||||||
options.AddExcludedArgument("enable-automation");
|
|
||||||
options.AddArgument("--no-first-run");
|
|
||||||
options.AddArgument("--no-default-browser-check");
|
|
||||||
options.AddArgument("--start-maximized");
|
|
||||||
|
|
||||||
// Isolated, disposable profile per launch.
|
|
||||||
var profileDir = Path.Combine(Path.GetTempPath(), "bluelaminate-edge", Guid.NewGuid().ToString("N"));
|
|
||||||
Directory.CreateDirectory(profileDir);
|
|
||||||
options.AddArgument($"--user-data-dir={profileDir}");
|
|
||||||
|
|
||||||
if (blockImages)
|
|
||||||
options.AddUserProfilePreference("profile.managed_default_content_settings.images", 2);
|
|
||||||
|
|
||||||
_logger.LogInformation(
|
|
||||||
"Launching Edge via proxy {Endpoint} (provider {Provider}, session {Session}).",
|
|
||||||
lease.Endpoint, lease.Provider, lease.SessionId ?? "rotating");
|
|
||||||
|
|
||||||
var driver = new EdgeDriver(options);
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
// Answer the gateway's proxy-auth (407) challenge with the lease
|
|
||||||
// credentials. UriMatcher returns true so it applies to every
|
|
||||||
// request, since the challenge originates from the proxy itself.
|
|
||||||
var network = driver.Manage().Network;
|
|
||||||
network.AddAuthenticationHandler(new NetworkAuthenticationHandler
|
|
||||||
{
|
|
||||||
UriMatcher = _ => true,
|
|
||||||
Credentials = new PasswordCredentials(lease.Username, lease.Password),
|
|
||||||
});
|
|
||||||
await network.StartMonitoring();
|
|
||||||
}
|
|
||||||
catch
|
|
||||||
{
|
|
||||||
driver.Quit();
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
|
|
||||||
return driver;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,139 +0,0 @@
|
|||||||
using System.Text;
|
|
||||||
using BlueLaminate.Scraper.Browser;
|
|
||||||
using BlueLaminate.Scraper.Proxies;
|
|
||||||
using Microsoft.Extensions.Logging;
|
|
||||||
using OpenQA.Selenium;
|
|
||||||
|
|
||||||
namespace BlueLaminate.Scraper.CsFloat;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Phase-B discovery tool. Drives a real Edge browser through a residential
|
|
||||||
/// lease to a CSFloat search page, then records every CSFloat <c>/api/</c> JSON
|
|
||||||
/// response to disk while a human clicks around (open a listing → "Latest
|
|
||||||
/// Sales"). We don't yet know CSFloat's exact endpoints or DOM selectors, so a
|
|
||||||
/// human-in-the-loop is the cheapest way to surface the real traffic: the tool
|
|
||||||
/// just listens and dumps, the operator drives the UI in the visible window.
|
|
||||||
/// Once we can see the captured shapes we can automate navigation and design the
|
|
||||||
/// tables.
|
|
||||||
/// </summary>
|
|
||||||
public sealed class CsFloatCaptureService
|
|
||||||
{
|
|
||||||
private readonly IProxyProvider _provider;
|
|
||||||
private readonly BrowserDriverFactory _factory;
|
|
||||||
private readonly ILogger<CsFloatCaptureService> _logger;
|
|
||||||
|
|
||||||
public CsFloatCaptureService(
|
|
||||||
IProxyProvider provider,
|
|
||||||
BrowserDriverFactory factory,
|
|
||||||
ILogger<CsFloatCaptureService> logger)
|
|
||||||
{
|
|
||||||
_provider = provider;
|
|
||||||
_factory = factory;
|
|
||||||
_logger = logger;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Opens <paramref name="url"/> through the proxy and captures CSFloat API
|
|
||||||
/// responses to <paramref name="outputDir"/> until <paramref name="browseUntilDone"/>
|
|
||||||
/// completes (the CLI ties that to the operator pressing Enter). When
|
|
||||||
/// <paramref name="diagnose"/> is true, every CSFloat-domain response is
|
|
||||||
/// logged (url + status + type) to reveal where a login wall appears.
|
|
||||||
/// Returns the number of responses written.
|
|
||||||
/// </summary>
|
|
||||||
public async Task<int> RunAsync(
|
|
||||||
string url,
|
|
||||||
string outputDir,
|
|
||||||
ProxyRequest request,
|
|
||||||
bool loadImages,
|
|
||||||
bool diagnose,
|
|
||||||
Func<Task> browseUntilDone)
|
|
||||||
{
|
|
||||||
Directory.CreateDirectory(outputDir);
|
|
||||||
|
|
||||||
var lease = _provider.Acquire(request);
|
|
||||||
var driver = await _factory.CreateAsync(lease, blockImages: !loadImages);
|
|
||||||
|
|
||||||
var captured = 0;
|
|
||||||
|
|
||||||
void OnResponse(object? sender, NetworkResponseReceivedEventArgs e)
|
|
||||||
{
|
|
||||||
var responseUrl = e.ResponseUrl;
|
|
||||||
if (string.IsNullOrEmpty(responseUrl)
|
|
||||||
|| !responseUrl.Contains("csfloat", StringComparison.OrdinalIgnoreCase))
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Diagnose mode logs every CSFloat-domain response — including the
|
|
||||||
// SPA shell, redirects and any 401/403 — so we can see exactly where
|
|
||||||
// a Steam-login wall appears even before any /api/ call fires.
|
|
||||||
if (diagnose)
|
|
||||||
{
|
|
||||||
_logger.LogInformation("[{Status}] {Type} {Url}",
|
|
||||||
e.ResponseStatusCode, e.ResponseResourceType, responseUrl);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only JSON API calls get written to disk; skip the shell, images,
|
|
||||||
// fonts, analytics, etc. Matches both api.csfloat.com and csfloat.com/api.
|
|
||||||
if (!responseUrl.Contains("/api/", StringComparison.OrdinalIgnoreCase))
|
|
||||||
return;
|
|
||||||
|
|
||||||
var body = e.ResponseBody;
|
|
||||||
if (string.IsNullOrWhiteSpace(body))
|
|
||||||
{
|
|
||||||
// Body wasn't buffered (e.g. the known Fetch interception race).
|
|
||||||
// Log the endpoint so we still learn it exists even if empty.
|
|
||||||
_logger.LogWarning("No body captured for {Url} (status {Status}).",
|
|
||||||
responseUrl, e.ResponseStatusCode);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
var n = Interlocked.Increment(ref captured);
|
|
||||||
var fileName = $"{n:D3}_{Sanitize(responseUrl)}.json";
|
|
||||||
File.WriteAllText(Path.Combine(outputDir, fileName), body, Encoding.UTF8);
|
|
||||||
_logger.LogInformation(
|
|
||||||
"Captured #{N} [{Status}] {Url} → {File} ({Bytes} bytes).",
|
|
||||||
n, e.ResponseStatusCode, responseUrl, fileName, body.Length);
|
|
||||||
}
|
|
||||||
catch (Exception ex)
|
|
||||||
{
|
|
||||||
_logger.LogWarning(ex, "Failed to write capture for {Url}.", responseUrl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var network = driver.Manage().Network;
|
|
||||||
network.NetworkResponseReceived += OnResponse;
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
_logger.LogInformation("Navigating to {Url}", url);
|
|
||||||
driver.Navigate().GoToUrl(url);
|
|
||||||
await browseUntilDone();
|
|
||||||
}
|
|
||||||
finally
|
|
||||||
{
|
|
||||||
network.NetworkResponseReceived -= OnResponse;
|
|
||||||
driver.Quit();
|
|
||||||
}
|
|
||||||
|
|
||||||
return captured;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Turn a URL into a filesystem-safe, readable, length-capped file stem so the
|
|
||||||
// captures are self-describing (the endpoint is visible in the filename).
|
|
||||||
private static string Sanitize(string url)
|
|
||||||
{
|
|
||||||
var trimmed = url
|
|
||||||
.Replace("https://", "", StringComparison.OrdinalIgnoreCase)
|
|
||||||
.Replace("http://", "", StringComparison.OrdinalIgnoreCase);
|
|
||||||
|
|
||||||
var sb = new StringBuilder(trimmed.Length);
|
|
||||||
foreach (var c in trimmed)
|
|
||||||
sb.Append(char.IsLetterOrDigit(c) || c is '-' or '.' ? c : '_');
|
|
||||||
|
|
||||||
var stem = sb.ToString();
|
|
||||||
return stem.Length > 120 ? stem[..120] : stem;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -2,11 +2,11 @@ namespace BlueLaminate.Scraper.Proxies;
|
|||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// A concrete, ready-to-use proxy endpoint handed back by an
|
/// A concrete, ready-to-use proxy endpoint handed back by an
|
||||||
/// <see cref="IProxyProvider"/>. This is the only proxy type the browser layer
|
/// <see cref="IProxyProvider"/>. This is the only proxy type a consumer ever
|
||||||
/// ever sees, so swapping providers (or mixing several in a grab-bag) never
|
/// sees, so swapping providers (or mixing several in a grab-bag) never touches
|
||||||
/// touches the Selenium code. <see cref="Username"/> and <see cref="Password"/>
|
/// the calling code. <see cref="Username"/> and <see cref="Password"/> are the
|
||||||
/// are the literal credentials to present to the gateway — for providers like
|
/// literal credentials to present to the gateway — for providers like IPRoyal
|
||||||
/// IPRoyal the targeting/session parameters are already baked into them.
|
/// the targeting/session parameters are already baked into them.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="Host">Gateway host, e.g. "geo.iproyal.com".</param>
|
/// <param name="Host">Gateway host, e.g. "geo.iproyal.com".</param>
|
||||||
/// <param name="Port">Gateway port, e.g. 12321.</param>
|
/// <param name="Port">Gateway port, e.g. 12321.</param>
|
||||||
|
|||||||
@@ -1,97 +0,0 @@
|
|||||||
using System.Text.Json;
|
|
||||||
using BlueLaminate.Scraper.Browser;
|
|
||||||
using Microsoft.Extensions.Logging;
|
|
||||||
using OpenQA.Selenium;
|
|
||||||
|
|
||||||
namespace BlueLaminate.Scraper.Proxies;
|
|
||||||
|
|
||||||
/// <summary>The exit IP a proxy lease actually resolves to, per ipinfo.io.</summary>
|
|
||||||
/// <param name="Org">
|
|
||||||
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
|
|
||||||
/// residential vs. datacenter: a consumer ISP here means a real residential
|
|
||||||
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
|
|
||||||
/// </param>
|
|
||||||
public sealed record ProxyExitInfo(
|
|
||||||
string? Ip,
|
|
||||||
string? City,
|
|
||||||
string? Region,
|
|
||||||
string? Country,
|
|
||||||
string? Org,
|
|
||||||
string? Hostname,
|
|
||||||
string? Timezone);
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
|
|
||||||
/// launch the real browser through it, and read back the exit IP from an
|
|
||||||
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
|
|
||||||
/// against a metered residential plan — it proves auth works and shows whether
|
|
||||||
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
|
|
||||||
/// </summary>
|
|
||||||
public sealed class ProxyProbe
|
|
||||||
{
|
|
||||||
private const string IpEchoUrl = "https://ipinfo.io/json";
|
|
||||||
|
|
||||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
|
||||||
{
|
|
||||||
PropertyNameCaseInsensitive = true,
|
|
||||||
};
|
|
||||||
|
|
||||||
private readonly IProxyProvider _provider;
|
|
||||||
private readonly BrowserDriverFactory _factory;
|
|
||||||
private readonly ILogger<ProxyProbe> _logger;
|
|
||||||
|
|
||||||
public ProxyProbe(
|
|
||||||
IProxyProvider provider,
|
|
||||||
BrowserDriverFactory factory,
|
|
||||||
ILogger<ProxyProbe> logger)
|
|
||||||
{
|
|
||||||
_provider = provider;
|
|
||||||
_factory = factory;
|
|
||||||
_logger = logger;
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<ProxyExitInfo> RunAsync(ProxyRequest request)
|
|
||||||
{
|
|
||||||
var lease = _provider.Acquire(request);
|
|
||||||
_logger.LogInformation(
|
|
||||||
"Acquired {Provider} lease (exit {Mode}).",
|
|
||||||
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
|
|
||||||
|
|
||||||
var driver = await _factory.CreateAsync(lease, blockImages: true);
|
|
||||||
try
|
|
||||||
{
|
|
||||||
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
|
|
||||||
driver.Navigate().GoToUrl(IpEchoUrl);
|
|
||||||
|
|
||||||
// Read the document's text rather than the DOM so the browser's
|
|
||||||
// built-in JSON viewer doesn't get in the way, then carve out the
|
|
||||||
// JSON object it rendered.
|
|
||||||
var rendered = ((IJavaScriptExecutor)driver)
|
|
||||||
.ExecuteScript("return document.documentElement.innerText;") as string
|
|
||||||
?? throw new InvalidOperationException("Browser returned no page text.");
|
|
||||||
|
|
||||||
var info = JsonSerializer.Deserialize<ProxyExitInfo>(ExtractJson(rendered), JsonOptions)
|
|
||||||
?? throw new InvalidOperationException("IP-echo response was empty.");
|
|
||||||
|
|
||||||
_logger.LogInformation(
|
|
||||||
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
|
|
||||||
info.Ip, info.City, info.Region, info.Country, info.Org);
|
|
||||||
|
|
||||||
return info;
|
|
||||||
}
|
|
||||||
finally
|
|
||||||
{
|
|
||||||
driver.Quit();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static string ExtractJson(string text)
|
|
||||||
{
|
|
||||||
var start = text.IndexOf('{');
|
|
||||||
var end = text.LastIndexOf('}');
|
|
||||||
if (start < 0 || end <= start)
|
|
||||||
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
|
|
||||||
|
|
||||||
return text[start..(end + 1)];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user