diff --git a/.server-changes/clickhouse-reader-url.md b/.server-changes/clickhouse-reader-url.md new file mode 100644 index 00000000000..8e23a51f3ee --- /dev/null +++ b/.server-changes/clickhouse-reader-url.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Add `CLICKHOUSE_READER_URL` to route ClickHouse reads to a read replica while writes stay on `CLICKHOUSE_URL`. Optional; defaults to `CLICKHOUSE_URL`. diff --git a/.server-changes/events-reader-clickhouse-url.md b/.server-changes/events-reader-clickhouse-url.md new file mode 100644 index 00000000000..3290ae629da --- /dev/null +++ b/.server-changes/events-reader-clickhouse-url.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Add `EVENTS_READER_CLICKHOUSE_URL` to send trace/span/log reads to a read replica while event inserts stay on `EVENTS_CLICKHOUSE_URL`. Optional; unset keeps reads and writes on the same instance. diff --git a/.server-changes/runs-list-clickhouse-url.md b/.server-changes/runs-list-clickhouse-url.md new file mode 100644 index 00000000000..b0640fb34bf --- /dev/null +++ b/.server-changes/runs-list-clickhouse-url.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Add `RUNS_LIST_CLICKHOUSE_URL` to send runs list queries to a separate ClickHouse instance. Defaults to `CLICKHOUSE_URL`. diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index d9c97711940..df85b4c2116 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -1627,6 +1627,11 @@ const EnvironmentSchema = z // Clickhouse CLICKHOUSE_URL: z.string(), + // Optional read replica endpoint. Read-only clients (logs, query, admin, runsList, + // engine, realtime) and the events client's READ path default to this when their own + // URL is unset; writes always stay on CLICKHOUSE_URL. Set once to move all reads to a + // replica. Must share storage with the CLICKHOUSE_URL warehouse. + CLICKHOUSE_READER_URL: z.string().optional(), CLICKHOUSE_KEEP_ALIVE_ENABLED: z.string().default("1"), CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS: z.coerce.number().int().optional(), CLICKHOUSE_MAX_OPEN_CONNECTIONS: z.coerce.number().int().default(10), @@ -1653,13 +1658,13 @@ const EnvironmentSchema = z LOGS_CLICKHOUSE_URL: z .string() .optional() - .transform((v) => v ?? process.env.CLICKHOUSE_URL), + .transform((v) => v ?? process.env.CLICKHOUSE_READER_URL ?? process.env.CLICKHOUSE_URL), // Query page ClickHouse limits (for TSQL queries) QUERY_CLICKHOUSE_URL: z .string() .optional() - .transform((v) => v ?? process.env.CLICKHOUSE_URL), + .transform((v) => v ?? process.env.CLICKHOUSE_READER_URL ?? process.env.CLICKHOUSE_URL), QUERY_CLICKHOUSE_MAX_EXECUTION_TIME: z.coerce.number().int().default(10), QUERY_CLICKHOUSE_MAX_MEMORY_USAGE: z.coerce.number().int().default(1_073_741_824), // 1GB in bytes QUERY_CLICKHOUSE_MAX_AST_ELEMENTS: z.coerce.number().int().default(4_000_000), @@ -1678,12 +1683,14 @@ const EnvironmentSchema = z ADMIN_CLICKHOUSE_URL: z .string() .optional() - .transform((v) => v ?? process.env.CLICKHOUSE_URL), + .transform((v) => v ?? process.env.CLICKHOUSE_READER_URL ?? process.env.CLICKHOUSE_URL), EVENTS_CLICKHOUSE_URL: z .string() .optional() .transform((v) => v ?? process.env.CLICKHOUSE_URL), + // Events read replica (traces/spans/logs). No CLICKHOUSE_READER_URL fallback by design: this write-capable client opts in explicitly. + EVENTS_READER_CLICKHOUSE_URL: z.string().optional(), EVENTS_CLICKHOUSE_KEEP_ALIVE_ENABLED: z.string().default("1"), EVENTS_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS: z.coerce.number().int().optional(), EVENTS_CLICKHOUSE_MAX_OPEN_CONNECTIONS: z.coerce.number().int().default(10), @@ -1696,7 +1703,7 @@ const EnvironmentSchema = z RUN_ENGINE_CLICKHOUSE_URL: z .string() .optional() - .transform((v) => v ?? process.env.CLICKHOUSE_URL), + .transform((v) => v ?? process.env.CLICKHOUSE_READER_URL ?? process.env.CLICKHOUSE_URL), RUN_ENGINE_CLICKHOUSE_KEEP_ALIVE_ENABLED: z.string().default("1"), RUN_ENGINE_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS: z.coerce.number().int().optional(), RUN_ENGINE_CLICKHOUSE_MAX_OPEN_CONNECTIONS: z.coerce.number().int().default(5), @@ -1708,7 +1715,7 @@ const EnvironmentSchema = z REALTIME_BACKEND_NATIVE_CLICKHOUSE_URL: z .string() .optional() - .transform((v) => v ?? process.env.CLICKHOUSE_URL), + .transform((v) => v ?? process.env.CLICKHOUSE_READER_URL ?? process.env.CLICKHOUSE_URL), REALTIME_BACKEND_NATIVE_CLICKHOUSE_KEEP_ALIVE_ENABLED: z.string().default("1"), REALTIME_BACKEND_NATIVE_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS: z.coerce.number().int().optional(), REALTIME_BACKEND_NATIVE_CLICKHOUSE_MAX_OPEN_CONNECTIONS: z.coerce.number().int().default(10), @@ -1716,6 +1723,20 @@ const EnvironmentSchema = z .enum(["log", "error", "warn", "info", "debug"]) .default("info"), REALTIME_BACKEND_NATIVE_CLICKHOUSE_COMPRESSION_REQUEST: z.string().default("1"), + // Dedicated ClickHouse pool for the runs list (dashboard + API). Lets us point + // the highest-traffic read path at a read replica without moving ingest/replication + // writes off CLICKHOUSE_URL. Falls back to CLICKHOUSE_URL when unset. + RUNS_LIST_CLICKHOUSE_URL: z + .string() + .optional() + .transform((v) => v ?? process.env.CLICKHOUSE_READER_URL ?? process.env.CLICKHOUSE_URL), + RUNS_LIST_CLICKHOUSE_KEEP_ALIVE_ENABLED: z.string().default("1"), + RUNS_LIST_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS: z.coerce.number().int().optional(), + RUNS_LIST_CLICKHOUSE_MAX_OPEN_CONNECTIONS: z.coerce.number().int().default(10), + RUNS_LIST_CLICKHOUSE_LOG_LEVEL: z + .enum(["log", "error", "warn", "info", "debug"]) + .default("info"), + RUNS_LIST_CLICKHOUSE_COMPRESSION_REQUEST: z.string().default("1"), EVENTS_CLICKHOUSE_BATCH_SIZE: z.coerce.number().int().default(1000), EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS: z.coerce.number().int().default(1000), METRICS_CLICKHOUSE_BATCH_SIZE: z.coerce.number().int().default(10000), diff --git a/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts index 0e7077b3dfc..794e47bf0ea 100644 --- a/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts @@ -269,7 +269,7 @@ export class ApiRunListPresenter extends BasePresenter { options.machines = searchParams["filter[machine]"]; } - const clickhouse = await clickhouseFactory.getClickhouseForOrganization(organizationId, "standard"); + const clickhouse = await clickhouseFactory.getClickhouseForOrganization(organizationId, "runsList"); const presenter = new NextRunListPresenter(this._replica, clickhouse); logger.debug("Calling RunListPresenter", { options }); diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx index fbe4b9046c6..b2c495474f8 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx @@ -190,7 +190,7 @@ async function getRunsListFromTableState({ return null; } - const clickhouse = await clickhouseFactory.getClickhouseForOrganization(project.organizationId, "standard"); + const clickhouse = await clickhouseFactory.getClickhouseForOrganization(project.organizationId, "runsList"); const runsListPresenter = new NextRunListPresenter($replica, clickhouse); const currentPageResult = await runsListPresenter.call(project.organizationId, environment.id, { userId, diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx index 25cccaede8e..28ffc6913c0 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx @@ -95,7 +95,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const clickhouse = await clickhouseFactory.getClickhouseForOrganization( project.organizationId, - "standard" + "runsList" ); const presenter = new NextRunListPresenter($replica, clickhouse); const list = presenter.call(project.organizationId, environment.id, { diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.children-statuses.ts b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.children-statuses.ts index 896dd25dd79..920301f1662 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.children-statuses.ts +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.children-statuses.ts @@ -67,7 +67,7 @@ export async function loader({ request, params }: LoaderFunctionArgs) { const clickhouse = await clickhouseFactory.getClickhouseForOrganization( project.organizationId, - "standard" + "runsList" ); const runsRepository = new RunsRepository({ clickhouse, prisma: $replica }); diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.live.ts b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.live.ts index 616aa728872..0da28c4538b 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.live.ts +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.live.ts @@ -34,7 +34,7 @@ export async function loader({ request, params }: LoaderFunctionArgs) { const clickhouse = await clickhouseFactory.getClickhouseForOrganization( project.organizationId, - "standard" + "runsList" ); const runsRepository = new RunsRepository({ clickhouse, prisma: $replica }); diff --git a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts index 7c20dd3a2a5..69828708f8b 100644 --- a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts +++ b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts @@ -242,6 +242,36 @@ function initializeRealtimeClickhouseClient(): ClickHouse { }); } +/** Runs list reads — dashboard + API (`RUNS_LIST_CLICKHOUSE_URL`); + * falls back to the default client if unset. */ +const defaultRunsListClickhouseClient = singleton( + "runsListClickhouseClient", + initializeRunsListClickhouseClient +); + +function initializeRunsListClickhouseClient(): ClickHouse { + if (!env.RUNS_LIST_CLICKHOUSE_URL) { + return defaultClickhouseClient; + } + + const url = new URL(env.RUNS_LIST_CLICKHOUSE_URL); + url.searchParams.delete("secure"); + + return new ClickHouse({ + url: url.toString(), + name: "runs-list-clickhouse", + keepAlive: { + enabled: env.RUNS_LIST_CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.RUNS_LIST_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.RUNS_LIST_CLICKHOUSE_LOG_LEVEL, + compression: { + request: env.RUNS_LIST_CLICKHOUSE_COMPRESSION_REQUEST === "1", + }, + maxOpenConnections: env.RUNS_LIST_CLICKHOUSE_MAX_OPEN_CONNECTIONS, + }); +} + /** Task events (`EVENTS_CLICKHOUSE_URL`); not exported — accessed via factory. */ const defaultEventsClickhouseClient = singleton( "eventsClickhouseClient", @@ -253,12 +283,10 @@ function initializeEventsClickhouseClient(): ClickHouse { throw new Error("EVENTS_CLICKHOUSE_URL is not set"); } - const url = new URL(env.EVENTS_CLICKHOUSE_URL); - url.searchParams.delete("secure"); + const writerUrl = new URL(env.EVENTS_CLICKHOUSE_URL); + writerUrl.searchParams.delete("secure"); - return new ClickHouse({ - url: url.toString(), - name: "task-events", + const commonConfig = { keepAlive: { enabled: env.EVENTS_CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", idleSocketTtl: env.EVENTS_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, @@ -268,6 +296,28 @@ function initializeEventsClickhouseClient(): ClickHouse { request: env.EVENTS_CLICKHOUSE_COMPRESSION_REQUEST === "1", }, maxOpenConnections: env.EVENTS_CLICKHOUSE_MAX_OPEN_CONNECTIONS, + }; + + // Mixed read+write client: split reads to its own EVENTS_READER_CLICKHOUSE_URL (not the global reader) so inserts can never hit the replica. + if (env.EVENTS_READER_CLICKHOUSE_URL) { + const readerUrl = new URL(env.EVENTS_READER_CLICKHOUSE_URL); + readerUrl.searchParams.delete("secure"); + + if (readerUrl.toString() !== writerUrl.toString()) { + return new ClickHouse({ + ...commonConfig, + writerName: "task-events-writer", + writerUrl: writerUrl.toString(), + readerName: "task-events-reader", + readerUrl: readerUrl.toString(), + }); + } + } + + return new ClickHouse({ + ...commonConfig, + name: "task-events", + url: writerUrl.toString(), }); } @@ -289,7 +339,8 @@ export type ClientType = | "query" | "admin" | "engine" - | "realtime"; + | "realtime" + | "runsList"; function buildOrgClickhouseClient(url: string, clientType: ClientType): ClickHouse { const parsed = new URL(url); @@ -379,6 +430,7 @@ function buildOrgClickhouseClient(url: string, clientType: ClientType): ClickHou case "standard": case "query": case "admin": + case "runsList": return new ClickHouse({ url: parsed.toString(), name, @@ -446,6 +498,8 @@ export class ClickhouseFactory { return defaultRunEngineClickhouseClient; case "realtime": return defaultRealtimeClickhouseClient; + case "runsList": + return defaultRunsListClickhouseClient; } }