I was wondering if any of you could help me to improve this query
SELECT IF(cases.country_region LIKE '%Korea%', 'South Korea', IF(upper(cases.country_region) = 'IRAN (ISLAMIC REPUBLIC OF)', 'Iran',
IF(upper(cases.country_region) = 'REPUBLIC OF IRELAND', 'IRELAND',
IF(cases.country_region = 'United Kingdom', 'UK', IF(upper(cases.country_region) = 'REPUBLIC OF MOLDOVA', 'MOLDOVA', cases.country_region))))) as country, (SUM(cases.latitude)/COUNT(cases.latitude)) as latitude,
(SUM(cases.longitude)/COUNT(cases.longitude)) as longitude, SUM(case when cases.confirmed is null then 0 else cases.confirmed end) as total_confirmed,
SUM(case when cases.deaths is null then 0 else cases.deaths end) as total_deaths, SUM(case when cases.recovered is null then 0 else cases.recovered end) as total_recovered,
SUM(case when cases.active is null then 0 else cases.active end) as total_active_cases, MAX(cases.date) as last_update
FROM
`bigquery-public-data.covid19_jhu_csse.summary` cases
INNER JOIN (
SELECT c.country_region, MAX(c.date) as maxdate
FROM `bigquery-public-data.covid19_jhu_csse.summary` c
WHERE c.date <= '2020-05-07'
GROUP BY c.country_region
) lcases ON cases.country_region = lcases.country_region AND cases.date = lcases.maxdate
GROUP BY country
HAVING total_confirmed > 0
ORDER BY total_confirmed desc;
I don't really know if there is any way to simplify the first IF(case) part.
If someone has any idea please comment below! Thank you very much!
The following recommendations will help you in your SQL tuning process.
You'll find 3 sections below:
ALTER TABLE `bigquery-public-data.covid19_jhu_csse.summary` ADD INDEX `bigquerypublicda_idx_date` (`date`);
ALTER TABLE `bigquery-public-data.covid19_jhu_csse.summary` ADD INDEX `bigquerypublicda_idx_country_region_date` (`country_region`,`date`);
SELECT
IF(cases.country_region LIKE '%Korea%',
'South Korea',
IF(upper(cases.country_region) = 'IRAN (ISLAMIC REPUBLIC OF)',
'Iran',
IF(upper(cases.country_region) = 'REPUBLIC OF IRELAND',
'IRELAND',
IF(cases.country_region = 'United Kingdom',
'UK',
IF(upper(cases.country_region) = 'REPUBLIC OF MOLDOVA',
'MOLDOVA',
cases.country_region))))) AS country,
(SUM(cases.latitude) / COUNT(cases.latitude)) AS latitude,
(SUM(cases.longitude) / COUNT(cases.longitude)) AS longitude,
SUM(CASE
WHEN cases.confirmed IS NULL THEN 0
ELSE cases.confirmed END) AS total_confirmed,
SUM(CASE
WHEN cases.deaths IS NULL THEN 0
ELSE cases.deaths END) AS total_deaths,
SUM(CASE
WHEN cases.recovered IS NULL THEN 0
ELSE cases.recovered END) AS total_recovered,
SUM(CASE
WHEN cases.active IS NULL THEN 0
ELSE cases.active END) AS total_active_cases,
MAX(cases.date) AS last_update
FROM
`bigquery-public-data.covid19_jhu_csse.summary` cases
INNER JOIN
(
SELECT
c.country_region,
MAX(c.date) AS maxdate
FROM
`bigquery-public-data.covid19_jhu_csse.summary` c
WHERE
c.date <= '2020-05-07'
GROUP BY
c.country_region
ORDER BY
NULL
) lcases
ON cases.country_region = lcases.country_region
AND cases.date = lcases.maxdate
GROUP BY
country
HAVING
total_confirmed > 0
ORDER BY
total_confirmed DESC