By the end of this lab, you will be able to: - Use aggregate functions (COUNT, SUM, AVG, MAX, MIN) - Apply GROUP BY clauses effectively - Filter grouped results with HAVING - Understand COUNT() vs COUNT(DISTINCT) - Write complex queries with nested aggregates
This lab uses the Company-Product-Purchase database. Let's set it up first:
DROP TABLE IF EXISTS company;
DROP TABLE IF EXISTS Product;
DROP TABLE IF EXISTS Purchase;
CREATE TABLE Company (
cname CHAR(20) PRIMARY KEY,
city CHAR(20)
);
CREATE TABLE Product (
pname CHAR(20) ,
maker CHAR(20),
price DOUBLE,
FOREIGN KEY (maker) REFERENCES company(cname),
PRIMARY KEY (pname, maker)
);
CREATE TABLE Purchase
(id INT PRIMARY KEY,
product CHAR(20),
buyer VARCHAR(30),
FOREIGN KEY (product) REFERENCES Product(pname)
);
INSERT INTO Company VALUES('X co','Seattle');
INSERT INTO Company VALUES('Y co','Jeffersonville');
INSERT INTO Company VALUES('A co','Seattle');
INSERT INTO Company VALUES('A inc','LA');
INSERT INTO Company VALUES('B co','NYC');
INSERT INTO Product VALUES('X','X co',20);
INSERT INTO Product VALUES('X+','X co',10);
INSERT INTO Product VALUES('Y','Y co',5);
INSERT INTO Product VALUES('A','A co',8);
INSERT INTO Product VALUES('T','A co',18);
INSERT INTO Product VALUES('A','A inc',5);
INSERT INTO Product VALUES('AA','A co',30);
INSERT INTO Product VALUES('AAA','A co',20);
INSERT INTO Product VALUES('B','B co',8);
INSERT INTO Purchase VALUES(1,'A','Joe Blow');
INSERT INTO Purchase VALUES(2,'A','Joe Smith');
INSERT INTO Purchase VALUES(3,'A','John Oliver');
INSERT INTO Purchase VALUES(4,'A','John Oliver');
INSERT INTO Purchase VALUES(5,'Y','Joe Blow');
INSERT INTO Purchase VALUES(6,'X','Joe Blow');
INSERT INTO Purchase VALUES(7,'A','Joe Blow');
INSERT INTO Purchase VALUES(8,'A','Joe Blow');
INSERT INTO Purchase VALUES(9,'A','Joe Blow');
Cities where one can find companies that manufacture products bought by "Joe Blowβ.
SELECT c.city
FROM Company c
WHERE c.cname IN (
SELECT pr.maker
FROM Purchase p, Product pr
WHERE p.product = pr.pname
AND p.buyer = 'Joe Blow')
Note that we got Seattle twice for X co and A co
SELECT c.city
FROM Company c,
Product p,
Purchase pu
WHERE c.cname = p.maker
AND p.pname = pu.product
AND pu.buyer = 'Joe Blow'
Compare the previous two queries.
Find products that are more expensive than all those produced by βX coβ
SELECT pname
FROM Product
WHERE price > ALL(
SELECT price
FROM Product
WHERE maker = 'X co')
Find βcopycatβ products, i.e. products made by competitors with the same names as products made by βA coβ
SELECT p1.pname
FROM Product p1
WHERE p1.maker = 'A co'
AND EXISTS(
SELECT p2.pname
FROM Product p2
WHERE p2.maker <> 'A co'
AND p1.pname = p2.pname)
DROP TABLE Purchase;
CREATE TABLE Purchase (
product CHAR(20),
pdate DATE,
price DOUBLE,
quantity INT
);
INSERT INTO Purchase VALUES('bagel','2021-10-21',1,20);
INSERT INTO Purchase VALUES('banana','2021-10-3',0.5,10);
INSERT INTO Purchase VALUES('banana','2021-10-10',1,10);
INSERT INTO Purchase VALUES('bagel','2021-10-25',1.5,20);
Note that we do not have a primary key.
Get the number of purchases.
sql
SELECT COUNT(*) FROM purchase
Find the sales after β10/10/2021β
SELECT product,
SUM(price * quantity) AS TotalSales
FROM Purchase
WHERE pdate > '2021-10-10'
GROUP BY product
SELECT DISTINCT x.product,
(SELECT SUM(y.price*y.quantity)
FROM Purchase y WHERE x.product = y.product AND y.pdate > '2021-10-10') TotalSales FROM Purchase x WHERE x.pdate > '2021-10-10'
Same query as before, except that we consider only products that have more than 100 buyers
sql
SELECT product, SUM(price*quantity)
FROM Purchase
WHERE pdate > '2021-10-1'
GROUP BY product
HAVING SUM(quantity) > 100
Purchase
| Id | Product | Price | Quantity |
|---|---|---|---|
| 1 | bagel | 1.0 | 5 |
| 2 | banana | 0.5 | 6 |
| 3 | Juice | 1.0 | 4 |
| 4 | donuts | 1.5 | 8 |
| 5 | bagel | 1.5 | 7 |
| 6 | banana | 1.0 | 4 |
| 7 | banana | 1.0 | 3 |
| 8 | juice | 3.0 | 5 |
DROP TABLE IF EXISTS Purchase;
CREATE TABLE Purchase (
ID INT PRIMARY KEY AUTO_INCREMENT,
Product CHAR(20),
Price DECIMAL(10,2),
Quantity INT
);
INSERT INTO Purchase(Product, Price, Quantity) VALUES('bagel',1.0,5),
('banana',.5,6), ('juice',1.0,4),
('donuts', 1.5,8), ('bagel', 1.5,7),
('banana',1.0,4),('banana',1,3), ('juice',3.0,5)
;
NOTE: AUTO_INCREMENT, and how we insert into the table.
SELECT SUM(Price*Quantity) AS Total_sales
FROM Purchase
WHERE product='bagel'
| Id | Product | Price | Quantity |
|---|---|---|---|
| 1 | bagel | 1.0 | 5 |
| 5 | bagel | 1.5 | 7 |
| Price | X | Quantity |
|---|---|---|
| 1.0 | x | 5 |
| 1.5 | x | 7 |
| 15.5 |
Run the following two queries:
SELECT COUNT(*)
FROM Purchase;
SELECT COUNT(DISTINCT product)
FROM Purchase;
First query returns 8 while the second returns 4.
The following query finds the total number of units sold across all products.
SELECT SUM(Quantity)
FROM Purchase
The query returns 36. To find the number of units sold for each product.
SELECT Product, SUM(Quantity)
FROM Purchase
GROUP BY Product
Find products and sales that have sold more than 10 units.
SELECT Product, SUM(Quantity*Price)
FROM Purchase
GROUP BY Product
HAVING SUM(Quantity)> 10
| Product | SUM(Quantity*Price) |
|---|---|
| bagel | 15.50 |
| banana | 10.00 |
Find the product with the most units sold.
SELECT Product, SUM(Quantity*Price)
FROM Purchase
GROUP BY Product
HAVING SUM(Quantity)= (
SELECT MAX(t.units)
FROM
(SELECT SUM(Quantity) AS units
FROM Purchase
GROUP BY Product) t
);
To understand the query, let us start with:
SELECT SUM(Quantity)
FROM Purchase
GROUP BY Product
SELECT MAX(t.units)
FROM (SELECT SUM(Quantity) AS units
FROM Purchase
GROUP BY Product) t
The query above finds the maximum of the total unit sold.
Run the following queries, to create the schema:
DROP TABLE IF EXISTS product;
DROP TABLE IF EXISTS supplier;
CREATE TABLE supplier(
id INT,
sname VARCHAR(30) NOT NULL,
PRIMARY KEY (id),
UNIQUE(sname)
);
CREATE TABLE product(
id INT PRIMARY KEY,
pname VARCHAR(30) NOT NULL,
price FLOAT,
supplierid INT,
FOREIGN KEY (supplierid) REFERENCES supplier(id)
);
Insert Data
INSERT INTO supplier(id, sname) VALUES (1, 'bike Ltd');
INSERT INTO supplier(id, sname) VALUES (2, 'bike USA');
INSERT INTO supplier(id, sname) VALUES (3, 'bikes4all');
INSERT INTO product VALUES (1,'girl bike', 110, 1);
INSERT INTO product VALUES (2,'boy bike', 110, 2);
INSERT INTO product VALUES (3,'mountain bike', 510, 2);
INSERT INTO product VALUES (4,'bike', 180, NULL );
INSERT INTO product VALUES (5,'ebike', 280, NULL );
Try to insert the following
INSERT INTO supplier(id, sname) VALUES (4, 'bike Ltd');
The previous SQL insert should not work, as we have specified that the supplier name to be unique.
Q1:
Find all products with unknown supplier.
A supplier of a product is recorded in product table to find out products with unknown supplier, we need to find those that are NULL.
SELECT * FROM product WHERE supplierid IS NULL
Q2: Find all suppliers with no product listed.
SELECT * FROM Supplier s
WHERE NOT EXISTS (SELECT * FROM product p
WHERE p.supplierid=s.id)
SELECT *
FROM supplier s, Product p
WHERE s.id = p.supplierid
select * from product p
join supplier s
on p.supplierid=s.id;
SELECT * FROM product p
LEFT OUTER JOIN supplier s
ON p.supplierid=s.id;
Q4: Find supplier (along with the supplied products)
SELECT * FROM product p
RIGHT OUTER JOIN supplier s
ON p.supplierid=s.id;
Q5 Find supplier and products including not matched
SELECT * FROM product p
full OUTER JOIN supplier s
ON p.supplierid=s.id;
SELECT * FROM product p
LEFT OUTER JOIN supplier s
ON p.supplierid=s.id
UNION
SELECT * FROM product p
RIGHT OUTER JOIN supplier s
ON p.supplierid=s.id;
Analyze sales data to find total revenue and average order value by month and product category.
-- Sample schema for sales analysis
CREATE TABLE Sales (
sale_id INT PRIMARY KEY,
product_name VARCHAR(100),
category VARCHAR(50),
sale_date DATE,
quantity INT,
unit_price DECIMAL(10,2)
);
-- Query: Monthly sales summary by category
SELECT
YEAR(sale_date) AS year,
MONTH(sale_date) AS month,
category,
COUNT(*) AS total_transactions,
SUM(quantity) AS total_units_sold,
SUM(quantity * unit_price) AS total_revenue,
AVG(quantity * unit_price) AS avg_transaction_value,
MIN(unit_price) AS min_price,
MAX(unit_price) AS max_price
FROM Sales
GROUP BY YEAR(sale_date), MONTH(sale_date), category
HAVING total_revenue > 1000
ORDER BY year DESC, month DESC, total_revenue DESC;
Calculate grade distribution and class statistics for different courses.
-- Grade analysis example
CREATE TABLE StudentGrades (
student_id INT,
course_name VARCHAR(100),
grade DECIMAL(5,2),
semester VARCHAR(20)
);
-- Query: Course statistics with grade distribution
SELECT
course_name,
COUNT(DISTINCT student_id) AS num_students,
AVG(grade) AS avg_grade,
MIN(grade) AS min_grade,
MAX(grade) AS max_grade,
STDDEV(grade) AS grade_std_dev,
SUM(CASE WHEN grade >= 90 THEN 1 ELSE 0 END) AS A_count,
SUM(CASE WHEN grade >= 80 AND grade < 90 THEN 1 ELSE 0 END) AS B_count,
SUM(CASE WHEN grade >= 70 AND grade < 80 THEN 1 ELSE 0 END) AS C_count,
SUM(CASE WHEN grade < 70 THEN 1 ELSE 0 END) AS below_C_count
FROM StudentGrades
GROUP BY course_name
HAVING num_students >= 10
ORDER BY avg_grade DESC;
Track inventory levels and identify products that need reordering.
-- Inventory analysis
CREATE TABLE Inventory (
product_id INT,
warehouse VARCHAR(50),
quantity_on_hand INT,
reorder_level INT,
unit_cost DECIMAL(10,2),
last_updated DATE
);
-- Query: Inventory summary by warehouse with reorder alerts
SELECT
warehouse,
COUNT(DISTINCT product_id) AS total_products,
SUM(quantity_on_hand) AS total_units,
SUM(quantity_on_hand * unit_cost) AS inventory_value,
AVG(quantity_on_hand) AS avg_stock_level,
COUNT(CASE WHEN quantity_on_hand <= reorder_level THEN 1 END) AS products_to_reorder,
SUM(CASE WHEN quantity_on_hand <= reorder_level
THEN (reorder_level - quantity_on_hand) * unit_cost
ELSE 0 END) AS reorder_cost
FROM Inventory
GROUP BY warehouse
ORDER BY inventory_value DESC;
Analyze customer purchasing patterns with aggregations.
-- Customer behavior analysis
CREATE TABLE CustomerPurchases (
customer_id INT,
customer_name VARCHAR(100),
purchase_date DATE,
product_category VARCHAR(50),
purchase_amount DECIMAL(10,2)
);
-- Query: Customer segmentation based on purchase behavior
SELECT
customer_id,
customer_name,
COUNT(*) AS total_purchases,
COUNT(DISTINCT product_category) AS categories_purchased,
SUM(purchase_amount) AS total_spent,
AVG(purchase_amount) AS avg_purchase_amount,
MIN(purchase_date) AS first_purchase,
MAX(purchase_date) AS last_purchase,
DATEDIFF(MAX(purchase_date), MIN(purchase_date)) AS customer_lifetime_days,
CASE
WHEN SUM(purchase_amount) > 10000 THEN 'VIP'
WHEN SUM(purchase_amount) > 5000 THEN 'Gold'
WHEN SUM(purchase_amount) > 1000 THEN 'Silver'
ELSE 'Bronze'
END AS customer_tier
FROM CustomerPurchases
GROUP BY customer_id, customer_name
HAVING total_purchases >= 3
ORDER BY total_spent DESC;
COUNT(*) counts all rows, while COUNT(column) counts non-NULL valuesCOUNT(DISTINCT column) counts unique values onlyGROUP BY to create subgroups for aggregationHAVING filters groups after aggregation (unlike WHERE which filters before)CASE statements within aggregates for conditional counting/summing