remove selenium related code for now

This commit is contained in:
bob
2026-05-29 22:17:11 -05:00
parent d1752b1b07
commit eb5fb0dac7
6 changed files with 5 additions and 507 deletions

View File

@@ -8,7 +8,6 @@
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.8" />
<PackageReference Include="Selenium.WebDriver" Version="4.44.0" />
</ItemGroup>
</Project>

View File

@@ -1,88 +0,0 @@
using BlueLaminate.Scraper.Proxies;
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
using OpenQA.Selenium.Edge;
namespace BlueLaminate.Scraper.Browser;
/// <summary>
/// Builds a non-headless Edge (Chromium) WebDriver routed through a
/// <see cref="ProxyLease"/>. Two things make this non-trivial:
/// <list type="bullet">
/// <item>Proxy authentication. Chromium can't auto-fill the gateway's auth
/// dialog under automation, and the classic extension trick relies on
/// Manifest V2 which current Chromium disables. Instead we answer the proxy's
/// 407 challenge through the DevTools (CDP) auth handler, which works
/// non-headless and needs no extension.</item>
/// <item>Bandwidth. The residential plan is metered per GB, so images are
/// disabled at the content-settings level. Cloudflare gates on JS execution and
/// TLS/behaviour, not whether pictures render, so this stays realistic.</item>
/// </list>
/// Each driver gets a throwaway user-data dir so runs never share cookies and
/// never touch the user's real Edge profile.
/// </summary>
public sealed class BrowserDriverFactory
{
private readonly ILogger<BrowserDriverFactory> _logger;
public BrowserDriverFactory(ILogger<BrowserDriverFactory> logger)
{
_logger = logger;
}
public async Task<IWebDriver> CreateAsync(ProxyLease lease, bool blockImages = true)
{
var options = new EdgeOptions();
// Route browser traffic through the gateway via the launch argument
// rather than EdgeOptions.Proxy. Setting Proxy makes Selenium hand the
// gateway to Selenium Manager for the driver *download* too, which fails
// because that step can't authenticate. The arg scopes the proxy to the
// browser only; credentials are answered below via CDP. No scheme = all
// protocols use the gateway.
options.AddArgument($"--proxy-server={lease.Endpoint}");
// Reduce the most obvious automation tells; residential exit + a real
// (non-headless) browser do the rest.
options.AddArgument("--disable-blink-features=AutomationControlled");
options.AddExcludedArgument("enable-automation");
options.AddArgument("--no-first-run");
options.AddArgument("--no-default-browser-check");
options.AddArgument("--start-maximized");
// Isolated, disposable profile per launch.
var profileDir = Path.Combine(Path.GetTempPath(), "bluelaminate-edge", Guid.NewGuid().ToString("N"));
Directory.CreateDirectory(profileDir);
options.AddArgument($"--user-data-dir={profileDir}");
if (blockImages)
options.AddUserProfilePreference("profile.managed_default_content_settings.images", 2);
_logger.LogInformation(
"Launching Edge via proxy {Endpoint} (provider {Provider}, session {Session}).",
lease.Endpoint, lease.Provider, lease.SessionId ?? "rotating");
var driver = new EdgeDriver(options);
try
{
// Answer the gateway's proxy-auth (407) challenge with the lease
// credentials. UriMatcher returns true so it applies to every
// request, since the challenge originates from the proxy itself.
var network = driver.Manage().Network;
network.AddAuthenticationHandler(new NetworkAuthenticationHandler
{
UriMatcher = _ => true,
Credentials = new PasswordCredentials(lease.Username, lease.Password),
});
await network.StartMonitoring();
}
catch
{
driver.Quit();
throw;
}
return driver;
}
}

View File

@@ -1,139 +0,0 @@
using System.Text;
using BlueLaminate.Scraper.Browser;
using BlueLaminate.Scraper.Proxies;
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
namespace BlueLaminate.Scraper.CsFloat;
/// <summary>
/// Phase-B discovery tool. Drives a real Edge browser through a residential
/// lease to a CSFloat search page, then records every CSFloat <c>/api/</c> JSON
/// response to disk while a human clicks around (open a listing → "Latest
/// Sales"). We don't yet know CSFloat's exact endpoints or DOM selectors, so a
/// human-in-the-loop is the cheapest way to surface the real traffic: the tool
/// just listens and dumps, the operator drives the UI in the visible window.
/// Once we can see the captured shapes we can automate navigation and design the
/// tables.
/// </summary>
public sealed class CsFloatCaptureService
{
private readonly IProxyProvider _provider;
private readonly BrowserDriverFactory _factory;
private readonly ILogger<CsFloatCaptureService> _logger;
public CsFloatCaptureService(
IProxyProvider provider,
BrowserDriverFactory factory,
ILogger<CsFloatCaptureService> logger)
{
_provider = provider;
_factory = factory;
_logger = logger;
}
/// <summary>
/// Opens <paramref name="url"/> through the proxy and captures CSFloat API
/// responses to <paramref name="outputDir"/> until <paramref name="browseUntilDone"/>
/// completes (the CLI ties that to the operator pressing Enter). When
/// <paramref name="diagnose"/> is true, every CSFloat-domain response is
/// logged (url + status + type) to reveal where a login wall appears.
/// Returns the number of responses written.
/// </summary>
public async Task<int> RunAsync(
string url,
string outputDir,
ProxyRequest request,
bool loadImages,
bool diagnose,
Func<Task> browseUntilDone)
{
Directory.CreateDirectory(outputDir);
var lease = _provider.Acquire(request);
var driver = await _factory.CreateAsync(lease, blockImages: !loadImages);
var captured = 0;
void OnResponse(object? sender, NetworkResponseReceivedEventArgs e)
{
var responseUrl = e.ResponseUrl;
if (string.IsNullOrEmpty(responseUrl)
|| !responseUrl.Contains("csfloat", StringComparison.OrdinalIgnoreCase))
{
return;
}
// Diagnose mode logs every CSFloat-domain response — including the
// SPA shell, redirects and any 401/403 — so we can see exactly where
// a Steam-login wall appears even before any /api/ call fires.
if (diagnose)
{
_logger.LogInformation("[{Status}] {Type} {Url}",
e.ResponseStatusCode, e.ResponseResourceType, responseUrl);
}
// Only JSON API calls get written to disk; skip the shell, images,
// fonts, analytics, etc. Matches both api.csfloat.com and csfloat.com/api.
if (!responseUrl.Contains("/api/", StringComparison.OrdinalIgnoreCase))
return;
var body = e.ResponseBody;
if (string.IsNullOrWhiteSpace(body))
{
// Body wasn't buffered (e.g. the known Fetch interception race).
// Log the endpoint so we still learn it exists even if empty.
_logger.LogWarning("No body captured for {Url} (status {Status}).",
responseUrl, e.ResponseStatusCode);
return;
}
try
{
var n = Interlocked.Increment(ref captured);
var fileName = $"{n:D3}_{Sanitize(responseUrl)}.json";
File.WriteAllText(Path.Combine(outputDir, fileName), body, Encoding.UTF8);
_logger.LogInformation(
"Captured #{N} [{Status}] {Url} → {File} ({Bytes} bytes).",
n, e.ResponseStatusCode, responseUrl, fileName, body.Length);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to write capture for {Url}.", responseUrl);
}
}
var network = driver.Manage().Network;
network.NetworkResponseReceived += OnResponse;
try
{
_logger.LogInformation("Navigating to {Url}", url);
driver.Navigate().GoToUrl(url);
await browseUntilDone();
}
finally
{
network.NetworkResponseReceived -= OnResponse;
driver.Quit();
}
return captured;
}
// Turn a URL into a filesystem-safe, readable, length-capped file stem so the
// captures are self-describing (the endpoint is visible in the filename).
private static string Sanitize(string url)
{
var trimmed = url
.Replace("https://", "", StringComparison.OrdinalIgnoreCase)
.Replace("http://", "", StringComparison.OrdinalIgnoreCase);
var sb = new StringBuilder(trimmed.Length);
foreach (var c in trimmed)
sb.Append(char.IsLetterOrDigit(c) || c is '-' or '.' ? c : '_');
var stem = sb.ToString();
return stem.Length > 120 ? stem[..120] : stem;
}
}

View File

@@ -2,11 +2,11 @@ namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// A concrete, ready-to-use proxy endpoint handed back by an
/// <see cref="IProxyProvider"/>. This is the only proxy type the browser layer
/// ever sees, so swapping providers (or mixing several in a grab-bag) never
/// touches the Selenium code. <see cref="Username"/> and <see cref="Password"/>
/// are the literal credentials to present to the gateway — for providers like
/// IPRoyal the targeting/session parameters are already baked into them.
/// <see cref="IProxyProvider"/>. This is the only proxy type a consumer ever
/// sees, so swapping providers (or mixing several in a grab-bag) never touches
/// the calling code. <see cref="Username"/> and <see cref="Password"/> are the
/// literal credentials to present to the gateway — for providers like IPRoyal
/// the targeting/session parameters are already baked into them.
/// </summary>
/// <param name="Host">Gateway host, e.g. "geo.iproyal.com".</param>
/// <param name="Port">Gateway port, e.g. 12321.</param>

View File

@@ -1,97 +0,0 @@
using System.Text.Json;
using BlueLaminate.Scraper.Browser;
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
namespace BlueLaminate.Scraper.Proxies;
/// <summary>The exit IP a proxy lease actually resolves to, per ipinfo.io.</summary>
/// <param name="Org">
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
/// residential vs. datacenter: a consumer ISP here means a real residential
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
/// </param>
public sealed record ProxyExitInfo(
string? Ip,
string? City,
string? Region,
string? Country,
string? Org,
string? Hostname,
string? Timezone);
/// <summary>
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
/// launch the real browser through it, and read back the exit IP from an
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
/// against a metered residential plan — it proves auth works and shows whether
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
/// </summary>
public sealed class ProxyProbe
{
private const string IpEchoUrl = "https://ipinfo.io/json";
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNameCaseInsensitive = true,
};
private readonly IProxyProvider _provider;
private readonly BrowserDriverFactory _factory;
private readonly ILogger<ProxyProbe> _logger;
public ProxyProbe(
IProxyProvider provider,
BrowserDriverFactory factory,
ILogger<ProxyProbe> logger)
{
_provider = provider;
_factory = factory;
_logger = logger;
}
public async Task<ProxyExitInfo> RunAsync(ProxyRequest request)
{
var lease = _provider.Acquire(request);
_logger.LogInformation(
"Acquired {Provider} lease (exit {Mode}).",
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
var driver = await _factory.CreateAsync(lease, blockImages: true);
try
{
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
driver.Navigate().GoToUrl(IpEchoUrl);
// Read the document's text rather than the DOM so the browser's
// built-in JSON viewer doesn't get in the way, then carve out the
// JSON object it rendered.
var rendered = ((IJavaScriptExecutor)driver)
.ExecuteScript("return document.documentElement.innerText;") as string
?? throw new InvalidOperationException("Browser returned no page text.");
var info = JsonSerializer.Deserialize<ProxyExitInfo>(ExtractJson(rendered), JsonOptions)
?? throw new InvalidOperationException("IP-echo response was empty.");
_logger.LogInformation(
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
info.Ip, info.City, info.Region, info.Country, info.Org);
return info;
}
finally
{
driver.Quit();
}
}
private static string ExtractJson(string text)
{
var start = text.IndexOf('{');
var end = text.LastIndexOf('}');
if (start < 0 || end <= start)
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
return text[start..(end + 1)];
}
}