here is the CASE / CAST / SUBSTRING performance cost on a data set with about 200 million records and a windows system with 64gb of ram. this might help others as well..
> # no conversion commands
> system.time( dbGetQuery( db , "SELECT COUNT(*) , some_date AS some_month FROM some_table GROUP BY some_month" ) )
user system elapsed
0.03 0.00 7.94
>
> # SUBSTRING only
> system.time( dbGetQuery( db , "SELECT COUNT(*) , SUBSTRING( some_date , 1 , 1 ) AS some_month FROM some_table GROUP BY some_month" ) )
user system elapsed
0.00 0.00 16.39
>
> # CAST only
> system.time( dbGetQuery( db , "SELECT COUNT(*) , CAST( some_date AS VARCHAR(255) ) AS some_month FROM some_table GROUP BY some_month" ) )
user system elapsed
0.02 0.00 35.58
>
> # CASE only
> system.time( dbGetQuery( db , "SELECT COUNT(*) , ( CASE WHEN ( some_date IS NULL ) THEN 'hello' ELSE some_date END ) AS some_month FROM some_table GROUP BY some_month" ) )
user system elapsed
0.02 0.00 8.73
>
> # CAST + SUBSTRING
> system.time( dbGetQuery( db , "SELECT COUNT(*) , CAST( SUBSTRING( some_date , 1 , 1 ) AS DOUBLE ) AS some_month FROM some_table GROUP BY some_month" ) )
user system elapsed
0.0 0.0 34.5
>
> # CASE + SUBSTRING
> system.time( dbGetQuery( db , "SELECT COUNT(*) , ( CASE WHEN SUBSTRING( some_date , 2 , 1 ) = '/' THEN SUBSTRING( some_date , 1 , 1 ) ELSE SUBSTRING( some_date , 1 , 2 ) END ) AS some_month FROM some_table GROUP BY some_month" ) )
user system elapsed
0.00 0.00 51.49
>
> # CASE + CAST + SUBSTRING
> system.time( dbGetQuery( db , "SELECT COUNT(*) , CAST( CASE WHEN SUBSTRING( some_date , 2 , 1 ) = '/' THEN SUBSTRING( some_date , 1 , 1 ) ELSE SUBSTRING( some_date , 1 , 2 ) END AS DOUBLE ) AS some_month FROM some_table GROUP BY some_month" ) )
user system elapsed
0.00 0.00 73.03