--Create Table (Rowstore First)
CREATE TABLE sales
(
    sale_id     BIGINT IDENTITY(1,1),
    product_id  INT,
    customer_id INT,
    amount      DECIMAL(10,2),
    quantity    INT,
    order_date  DATE,
    region      VARCHAR(20)
);
--Generate Sample Data (Imitation)
--Fast bulk generation using system tables:

INSERT INTO sales (product_id, customer_id, amount, quantity, order_date, region)
SELECT 
    ABS(CHECKSUM(NEWID())) % 1000,         -- product_id
    ABS(CHECKSUM(NEWID())) % 50000,        -- customer_id
    CAST(ABS(CHECKSUM(NEWID())) % 10000 / 10.0 AS DECIMAL(10,2)), -- amount
    ABS(CHECKSUM(NEWID())) % 10 + 1,       -- quantity
    DATEADD(DAY, -ABS(CHECKSUM(NEWID())) % 365, GETDATE()), -- last 1 year
    CASE ABS(CHECKSUM(NEWID())) % 5
        WHEN 0 THEN 'US'
        WHEN 1 THEN 'EU'
        WHEN 2 THEN 'ASIA'
        WHEN 3 THEN 'AFRICA'
        ELSE 'OTHER'
    END
FROM sys.all_objects a
CROSS JOIN sys.all_objects b;

--This produces hundreds of thousands to millions of rows
--Baseline Query (Rowstore)
SELECT region, SUM(amount) AS total_amount
FROM sales
WHERE order_date >= '2025-01-01'
GROUP BY region;

--Typical behavior:
--Full table scan
--Row-by-row processing
--Higher I/O
--Add Columnstore Index
--Option A — Clustered (DW style)
CREATE CLUSTERED COLUMNSTORE INDEX cci_sales ON sales;

/*
Table becomes columnstore
Data compressed
Batch mode enabled
Option B — Nonclustered (Hybrid)
*/
CREATE NONCLUSTERED COLUMNSTORE INDEX ncci_sales
ON sales (amount, quantity, order_date, region);

--Rowstore remains, columnstore added for analytics
--Run Same Query Again
SELECT region, SUM(amount) AS total_amount
FROM sales
WHERE order_date >= '2025-01-01'
GROUP BY region;
/*What Changes Internally
Before (Rowstore)
Reads all columns
Row-by-row execution
High logical reads
After (Columnstore)

SQL Server does:

Reads only:
amount
order_date
region
Uses:
Batch mode execution
Segment elimination
Compressed data scan
?? 7. Example Analytical Queries
?? Aggregation
*/
SELECT SUM(amount) 
FROM sales;

--Only amount column scanned ? very fast

--Filter + Aggregation
SELECT SUM(amount)
FROM sales
WHERE region = 'US';

SELECT 
    region,
    COUNT(*) AS orders,
    SUM(amount) AS revenue
FROM sales
GROUP BY region;

SELECT 
    YEAR(order_date) AS yr,
    MONTH(order_date) AS mn,
    SUM(amount) AS revenue
FROM sales
GROUP BY YEAR(order_date), MONTH(order_date)
ORDER BY yr, mn;

--Check Columnstore Internals
SELECT *
FROM sys.dm_db_column_store_row_group_physical_stats
WHERE object_id = OBJECT_ID('sales');