TagPostgreSQL

This is my process to improve the performance of my PostgreSQL database and queries

Every now and then I’ll run the queries below to help me figure out ways to optimize the queries my application is using, and to figure out if certain tables and views (relations) need to be optimized.

/* Show the last time at which every table and view was vacuumed and analyzed */
SELECT relname, last_vacuum, last_autovacuum, last_analyze, last_autoanalyze  
FROM pg_stat_all_tables  d_d
WHERE schemaname = 'public';  

/* See which queries are most popular */
SELECT * FROM pg_stat_statements where calls >=1000; /* replace 1000 with any integer, depending on how active your database is queried */
select pg_stat_statements_reset();

/* Kill a specific query */
select pg_terminate_backend(INTEGER); /* replace INTEGER with the process ID of a specific query */

/* Get the size of the database */
select pg_size_pretty(pg_database_size('DATABASE_NAME')); /* replace DATABASE_NAME with the name of your database */

/* Kill queries that are taking too long; replace DATABASE_NAME with the name of your database */
SELECT query, pg_terminate_backend(pid) 
    FROM pg_stat_activity 
    WHERE datname = 'DATABASE_NAME' 
      AND pid <> pg_backend_pid() 
      AND (state = 'active' 
      OR state = 'idle') 
      AND state_change < current_timestamp - INTERVAL '15' MINUTE; /* change 15 to any integer that you think means a query is taking too long */

/* See which queries are running */
select * from pg_stat_activity;
SELECT * FROM pg_stat_activity WHERE wait_event IS NOT NULL AND backend_type = 'client backend';

/* Find very large indexes and indexes that aren't used much */
WITH table_scans as ( 
    SELECT relid, 
        tables.idx_scan + tables.seq_scan as all_scans, 
        ( tables.n_tup_ins + tables.n_tup_upd + tables.n_tup_del ) as writes, 
                pg_relation_size(relid) as table_size 
        FROM pg_stat_user_tables as tables 
),
all_writes as ( 
    SELECT sum(writes) as total_writes 
    FROM table_scans 
),
indexes as ( 
    SELECT idx_stat.relid, idx_stat.indexrelid, 
        idx_stat.schemaname, idx_stat.relname as tablename, 
        idx_stat.indexrelname as indexname, 
        idx_stat.idx_scan,
        pg_relation_size(idx_stat.indexrelid) as index_bytes, 
        indexdef ~* 'USING btree' AS idx_is_btree 
    FROM pg_stat_user_indexes as idx_stat 
        JOIN pg_index 
            USING (indexrelid) 
        JOIN pg_indexes as indexes 
            ON idx_stat.schemaname = indexes.schemaname 
                AND idx_stat.relname = indexes.tablename 
                AND idx_stat.indexrelname = indexes.indexname 
    WHERE pg_index.indisunique = FALSE 
),
index_ratios AS ( 
SELECT schemaname, tablename, indexname, 
    idx_scan, all_scans,
    round(( CASE WHEN all_scans = 0 THEN 0.0::NUMERIC 
        ELSE idx_scan::NUMERIC/all_scans * 100 END),2) as index_scan_pct, 
    writes,
    round((CASE WHEN writes = 0 THEN idx_scan::NUMERIC ELSE idx_scan::NUMERIC/writes END),2) 
        as scans_per_write, 
    pg_size_pretty(index_bytes) as index_size, 
    pg_size_pretty(table_size) as table_size, 
    idx_is_btree, index_bytes
    FROM indexes 
    JOIN table_scans 
    USING (relid) 
),
index_groups AS ( 
SELECT 'Never Used Indexes' as reason, *, 1 as grp 
FROM index_ratios 
WHERE
    idx_scan = 0
    and idx_is_btree 
UNION ALL 
SELECT 'Low Scans, High Writes' as reason, *, 2 as grp 
FROM index_ratios 
WHERE
    scans_per_write <= 1
    and index_scan_pct < 10 
    and idx_scan > 0 
    and writes > 100 
    and idx_is_btree 
UNION ALL 
SELECT 'Seldom Used Large Indexes' as reason, *, 3 as grp 
FROM index_ratios 
WHERE
    index_scan_pct < 5
    and scans_per_write > 1 
    and idx_scan > 0 
    and idx_is_btree 
    and index_bytes > 100000000 
UNION ALL 
SELECT 'High-Write Large Non-Btree' as reason, index_ratios.*, 4 as grp 
FROM index_ratios, all_writes 
WHERE
    ( writes::NUMERIC / ( total_writes + 1 ) ) > 0.02 
    AND NOT idx_is_btree 
    AND index_bytes > 100000000 
ORDER BY grp, index_bytes DESC ) 
SELECT reason, schemaname, tablename, indexname, 
    index_scan_pct, scans_per_write, index_size, table_size
FROM index_groups; 

Pick the lowest and highest numbers in an array of numbers in PostgreSQL

I thought solving this problem took longer than it should have. I thought there would have been an integrated function in PostgreSQL to pick the lowest (smallest) and highest (largest) numbers in an ARRAY of numbers.

LEAST and GREATEST didn’t work, since those work on expressions, not arrays.

MIN and MAX don’t work because they are aggregating functions, and I didn’t want that.

Of course I found the solution on StackOverflow, but not after a lot of searching and trying other potential solutions.

Here it is!

Given an array of numbers, pick the lowest and highest ones using two custom functions.

CREATE OR REPLACE FUNCTION small(anyarray, int)

 RETURNS anyelement AS $$

  SELECT (ARRAY(SELECT unnest($1) ORDER BY 1 asc))[$2]

 $$ LANGUAGE sql;

The second argument in this function is to extract the Nth smallest number. In my case I want the smallest number so I set “1” for the second argument.

Example array in PostgreSQL:

{45.04,124.90,45.04,124.90}

Example query:

SELECT small(‘{45.04,124.90,45.04,124.90}’::numeric[], 1)

Output: 45.04

You can rewrite the query to select the Nth largest number by changing the “ORDER BY 1 asc” to “ORDER BY 1 desc” (reversing the order of the array’s unnesting.)

CREATE OR REPLACE FUNCTION small(anyarray, int)

RETURNS anyelement AS $$

SELECT (ARRAY(SELECT unnest($1) ORDER BY 1 asc))[$2]

$$ LANGUAGE sql;

Example array in PostgreSQL:

{45.04,124.90,45.04,124.90}

Example query:

SELECT large(‘{45.04,124.90,45.04,124.90}’::numeric[], 1)

Output: 124.9

© 2020 Steven Can Plan

Theme by Anders NorénUp ↑