#!/usr/bin/perl -w # # hm: generate a nice rainbow color heatmap from (x, y, z) numeric data # # Implementation: uses R filled.contour to render the image. # converts (x,y,z) data (sparse matrix) to dense matrix and adds the # needed axis-scales for x, and y. # # Almost every parameter can be overriden from the command line # Call without parameters to get a usage message # # Requires: # R # ImageMagick # # Copyright (c) Ariel Faigon, 2007-2008 # Released under the GNU General License (GPL) v3 # # $Id: hm,v 1.26 2008/09/25 19:42:40 ariel Exp $ # use strict; use Getopt::Std; use vars qw($opt_v $opt_V $opt_b $opt_s $opt_D $opt_i $opt_S); # Needs ImageMagick 'display' utility, feel free to use another viewer my $DisplayProg = 'display'; my $Rverbosity; my %ColorScheme = ( # This is still too restricted # Really need to expose all R palette capabilities 'rainbow.colors' => 'Rainbow: Deep Violet .. Bright Red (default)', 'brainbow.colors' => 'BlueRainbow: Deep Blue .. Bright Red (default)', 'ycrainbow.colors' => 'YCenteredRainbow: ditto, but yellow centered', 'terrain.colors' => 'Terrain/Topographical: Green .. Brown, White', 'topo.colors' => 'Violet, Blue, Green, Yellow, Brown, White', 'grey.colors' => 'Grayscale: dark grey .. light grey', 'heat.colors' => 'Red, Orange, Yellow, White', 'cm.colors' => 'Light Blue .. Light Pink', 'danger.colors' => 'Green .. Red (sub-slice of rainbow)', ); my $DefaultColorScheme = 'rainbow.colors'; # # Supported command line parameters + their default settings # To change the default, pass 'varname=value' on the command line # # Some of these are redundant, even contradictory. Just trying # to give maximum control, just don't shoot yourself in the foot # by passing conflicting values # my %Vars = qw( title Title xlabel X ylabel Y zlabel Z imgfile /tmp/hm.n.png gamma 1.0 redder 1.0 colorscheme rainbow.colors icolor 0 ncolors 0 zlevels 0 zscale 1 zadd 0 ztrim 0 unset_val 7e-13 zcmin 7e-13 zcmax 7e-13 black 1 sum 0 xmin 7e-13 ymin 7e-13 zmin 7e-13 xmax 7e-13 ymax 7e-13 zmax 7e-13 xres 0 yres 0 zres 0 maxres 1024 nxtics 20 nytics 20 xlog 0 ylog 0 zlog 0 frame T contour 0 cocolor black colabels T colinetype solid colinewidth 1 grid 0 grcolor black grlinetype solid grlinewidth 1 xindex 1 yindex 2 zindex 3 header F height 800 width 1024 sep \t ); sub v(@) { return unless ($::opt_v); # print STDERR "caller: ", join(' ', (caller(0))[0..3]), "\n"; # Escape % format specifiers when single arg passed # (make this dummy-proof against a common pilot error) if (@_ == 1) { print STDERR $_[0]; } else { printf STDERR @_; } } # # support abbreviations: ti -> title etc. # sub abbrev2full($) { my $key = $_[0]; foreach my $fullword (keys %Vars) { if (index($fullword, $key) == 0) { return $fullword; } } ''; } sub which_colorscheme($) { my $key = $_[0]; foreach my $fullword (keys %ColorScheme) { if (index($fullword, $key) == 0) { return $fullword; } } printf STDERR "$0: %s isn't a supported color scheme: defaulting to %s\n", $key, $DefaultColorScheme; $DefaultColorScheme; } sub usage(@) { print STDERR @_, "\n" if (@_); my @varnames = sort keys %Vars; my $cslist = ''; for my $cs (keys %ColorScheme) { $cslist .= sprintf("\t%s\t%s\n", $cs, $ColorScheme{$cs}); } die "Usage: $0 [options] [varname=value]... [idx..] DataFile [ImgFile] Options: -v verbose -D don't display the image -S Sum z-values (instead of averaging them) -s Use as input column separator -i Pass to display program (default $DisplayProg) Supported Varnames (prefix-abbreviations work too): @varnames idx Numeric (1st=1) indices of (x y z) columns in DataFile x,y,z Numeric Indexes (as above) as one argument DataFile must be a CSV/TSV file with numeric columns typically there will be 3 columns (x, y, z) but you can have more (or less) and pick any 3 columns using [idx...] numeric arguments Supported ColorSchemes (prefix-abbreviations work too):\n$cslist "; } # # Make a best-guess what the column separator is given a data file # sub guess_column_separator($;@) { my ($file, @seps_to_check) = @_; return $opt_s if ((defined $opt_s) && length($opt_s)); unless (@seps_to_check) { @seps_to_check = ("\t", ',', ':', ';'); # default list } open(FS, $file) || die "$0: $file: $!\n"; my $line = ; close FS; die "$0: $file: no data - giving up\n" unless (defined $line); foreach my $sep (@seps_to_check) { if (index($line, $sep) >= 0) { v("%s: %s: assuming separator='%s'\n", $::0, $file, $sep); return $sep; } } # Could simply be a one-column file, which is OK. v("%s: %s: can't determine column separator, assuming tab\n", $::0, $file); "\t"; } my $NumPat = qr{^-?(?:\d+\.?|\d*(?:\.\d+)?)(?:e-?\d+)?$}io; sub is_numeric { my $num = shift; ($num ne '' && $num =~ $NumPat) ? 1 : 0; } sub has_header($$) { my ($file, $sep) = @_; open(FSH, $file) || die "$0: $file: $!\n"; my $line1 = ; my $line2 = ; close FSH; chomp($line1); chomp($line2); my @isN1 = map { is_numeric($_) } split($sep, $line1); my @isN2 = map { is_numeric($_) } split($sep, $line2); v("sep=$sep \@isN1=(@isN1) \@isN2=(@isN2)\n"); for (my $i = 0; $i < $#isN1; $i++) { return 1 if ($isN1[$i] != $isN2[$i]); } 0; } # # R code to generate heatmap chart. # Parameters have a {varname} form, and are substituted # before sending this as a string to R # # Obviously, I'm a total R newbie. # If you can make this simpler please send me a patch. # The goal of this whole exericise was to be able to generate # images from data in a snap, without having to learn R first... # my $Rcode = <<'RcodeEOF'; # -- handy funcs printf <- function(...) cat(sprintf(...)) eprintf <- function(...) cat(sprintf(...), file=stderr()) # # My heatmap R-code # rainbow.colors <- function(n) { # Return a rainbox colors color-map (vector of colors) # Need to clip R 'rainbow' range at 11/15 to avoid red wraparound colormap0 <- rev(rainbow(n, s=1, v=1, start=0, end=11/15, gamma={gamma})) # black should be supported on other schemes too... # (it is not documented for this reason.) if ({black}) { # add black + interpolate smooth transition from violet to black color # #00000000 -> #6600FFFF # color choice is not optimal, by any means colormap <- c('black', '#330066FF', '#660099FF', '#9900CCFF', colormap0) } else { colormap <- colormap0 } # invert color order if ({icolor}) colormap <- rev(colormap); colormap } brainbow.colors <- function(n) { # trim the upper violet spectrum, start with deep blue colormap <- rev(rainbow(n, s=1, v=1, start=0, end=10/15, gamma={gamma})) # invert color order if ({icolor}) colormap <- rev(colormap); colormap } ycrainbow.colors <- function(n) { # Center on yellow (1/6) by splitting into 2 disproportinate "halves" # and concatenating them n <- n/2 # -- red to yellow # cmap0 <- rev(rainbow(n, s=1, v=1, start=0, end=1/6.2, gamma={gamma})) # Darken bottom red section to compensate for the much smaller range dark.red.yellow = c('#99000099', '#FF0000FF', '#FFEE00FF') # Can we add alpha and gamma to colorRampPalette? cmap0 = rev(colorRampPalette(dark.red.yellow, space='Lab', bias=0.9)(n)) # -- yellow to blue cmap1 <- rev(rainbow(n, s=1, v=1, start=1/5.8, end=10/15, gamma={gamma})) colormap <- c(cmap1, cmap0) # invert color order if ({icolor}) colormap <- rev(colormap); colormap } danger.colors <- function(n) { # Return a rainbox slice colors from red to green, reversed colormap = rev(rainbow(n, s=1, v=1, start=0, end=1/3, gamma={gamma})) # invert color order if ({icolor}) colormap <- rev(colormap); colormap } # # This routine calculates two parameters: zadd, zscale # these two parameters determine our color-range (Z) # using zadd/zscale one can translate (move) and scale (stretch) # the color spectrum as seem by the user. # # For convenience: since it is much easier/natural for end-users # to specify a minimum and a maximum (zcmmin, zcmax) for the color range # rather than doing the math/algebra of calculating the (a,b) in # Y = a + bX, we try to do the algebra here. It essentially boils # down to solving a system of 2 linear equations with 2 unknowns. # unset_val = {unset_val} zadd_zscale <- function( zmin,zmax,zcmin=unset_val,zcmax=unset_val,zadd=0,zscale=1) { if (zcmin != unset_val || zcmax != unset_val) { # User had changed zadd/zscale defaults if (zadd != 0 || zscale != 1) { # user changed (zcmin,zcmax) as well! eprintf("Warning: Both zadd/zscale AND zcmin/zcmax modified:\n") eprintf("Using zcmin/zcmax & ignoring zadd/zscale setting\n") } if (zcmin == unset_val) zcmin = zmin if (zcmax == unset_val) zcmax = zmax # # Do the algebra for the user # (solve 2 linear equations in 2 unknowns) # calculate (zadd, zscale) from (zcmin, zcmax) # zscale = (zcmax - zcmin) / (zmax - zmin) zadd = zcmax - (zscale * zmax) } c(zadd, zscale) } ariel_heatmap <- function(datafile, ...) { dat <- read.table( datafile, sep="{sep}", header={header}, stringsAsFactors=T, fill=F, blank.lines.skip=T ) x <- dat[[{xindex}]] y <- dat[[{yindex}]] z <- dat[[{zindex}]] logzbase = {zlog} zlabel = "{zlabel}" # Can force min/max (X,Y,Z) from command-line xmin = ifelse({xmin} != unset_val, {xmin}, min(x, na.rm=T)) xmax = ifelse({xmax} != unset_val, {xmax}, max(x, na.rm=T)) xrange = xmax - xmin ymin = ifelse({ymin} != unset_val, {ymin}, min(y, na.rm=T)) ymax = ifelse({ymax} != unset_val, {ymax}, max(y, na.rm=T)) yrange = ymax - ymin # Range clipping: # If user requested any range clipping, remove any out-of-range # value from all 3 vectors if ({xmin} || {xmax} || {ymin} || {ymax}) { inrange.idxs = (xmin <= x) & (x <= xmax) & (ymin <= y) & (y <= ymax) x <- x[inrange.idxs] y <- y[inrange.idxs] z <- z[inrange.idxs] } # # Z-range: clamping or trimming # By default, we prefer clamping of extremes into (zmin, zmax) # range to trimming (removal) of these points because the latter # leaves unsightly 'holes' in the terrain. # If you prefer trimming, use ztrim=1 # zmin = ifelse({zmin} != unset_val, {zmin}, min(z, na.rm=T)) zmax = ifelse({zmax} != unset_val, {zmax}, max(z, na.rm=T)) ztrim = {ztrim} if (! ztrim) { # eprintf("Clamping: ztrim=%d\n", ztrim) if (zmin) { outrange.idxs = (zmin > z) z[outrange.idxs] = zmin } if (zmax) { outrange.idxs = (zmax < z) z[outrange.idxs] = zmax } } # # Calculate the zmin/max after the X/Y range trimming # and the X-range clamping because we may have lost some Z values # if (logzbase) { if (zmin < 0) { eprintf( "zmin=%g: Z values must be positive for log(), forcing to 0.001\n", zmin); zmin=0.001 } if (zmin > 0) { zmin = log(zmin, base=logzbase) zmax = log(zmax, base=logzbase) if (zlabel == 'Z') { zlabel = sprintf('log %g (Z)', logzbase) } } else { eprintf( "zmin=%g: Z values must be positive for log(), ignoring zlog.", zmin); # force logzbase back to 0 logzbase = 0 } } # Now we can finally calculate what our actual data Z-range is zrange = zmax - zmin # eprintf("zmin=%g zmax=%g zrange=%g\n", zmin, zmax, zrange) # Resolution (total number of 'tics' along (X,Y,Z) axes) # Adding 1 to make sure we're never 0 (some edge cases # resulting in a blank image). # Note: big res affect R rendering time dramatically. # For huge ranges (e.g. 1 million by 1 million by 1 million) # the time for R to render the image can grow to many hours. # Ideally we don't really need visual resolutions beyond about # 1000 points per dimension so we could just clip it at {maxres} # Bug: clipping this seems to clip the upper end of zlog=... # when zlog != 0 xres = ifelse({xres}, {xres}, 1+min(floor(xrange), {maxres})) yres = ifelse({yres}, {yres}, 1+min(floor(yrange), {maxres})) zres = ifelse({zres}, {zres}, 1+min(floor(zrange), {maxres})) # sprintf("xmin=%g xmax=%g", xmin, xmax) # sprintf("ymin=%g ymax=%g", ymin, ymax) # sprintf("zmin=%g zmax=%g", zmin, zmax) # make sure all the (dense) matrix is initialized to 0 zmatrix <- matrix(nrow=xres, ncol=yres, 0) zmatrix.count <- matrix(nrow=xres, ncol=yres, 0) # convert matrix from sparse->dense l = length(x) for (i in 1:l) { xval = x[i] yval = y[i] xidx = 1 + round((xval - xmin) / xrange * (xres-1)) yidx = 1 + round((yval - ymin) / yrange * (yres-1)) if (logzbase) { # trying to take a log of a negative number # will cause R to throw an exception zval = log(z[i], base=logzbase); } else { zval = z[i] } # sum of values zmatrix[xidx, yidx] <- zmatrix[xidx, yidx] + zval # count of values zmatrix.count[xidx, yidx] <- zmatrix.count[xidx, yidx] + 1 } # If we are not in sum mode, average zmatrix points based on counts if (! {sum}) { for (i in 1:xres) { for (j in 1:yres) { ijcount = zmatrix.count[i,j] if (ijcount > 1) { # We don't need to divide if: # ijcount is 1 (won't change result) # ijcount is 0 (avoid div-by-0) zmatrix[i,j] <- zmatrix[i,j] / ijcount } } } } xtics = seq(xmin, xmax, len=nrow(zmatrix)) ytics = seq(ymin, ymax, len=ncol(zmatrix)) ztics = seq(zmin, zmax, len={zlevels}) if (any(grep('.e?ps$', '{imgfile}'))) { postscript(file='{imgfile}', width={width}, height={height}) # help(postscript) for more options } if (any(grep('.png$', '{imgfile}'))) { png(file='{imgfile}', width={width}, height={height}) } if (any(grep('.jpe?g$', '{imgfile}'))) { jpeg(file='{imgfile}', width={width}, height={height}) } # xlim.val = range(x, finite=TRUE) # ylim.val = range(y, finite=TRUE) # zlim.val = range(zmatrix, finite=TRUE) ncolors = ifelse({ncolors}, {ncolors}, floor(zres)) # From Gilbert Leung # redder > 1 for more red, < 1 for more violet # may try to use * (power) instead of ^ for gentler redder zlevels = zmin + seq(0, 1, 1/ncolors)^{redder} * zrange zcmin = {zcmin} zcmax = {zcmax} zscale = {zscale} zadd = {zadd} zas = zadd_zscale(zmin,zmax,zcmin=zcmin,zcmax=zcmax,zadd=zadd,zscale=zscale) zadd = zas[1] zscale = zas[2] color_scheme = {colorscheme}(ncolors) # -- generate the main image filled.contour( x = xtics, y = ytics, z = zmatrix, ## -- Implied from (x,y,z) ranges so no need to pass them ## xlim = xlim.val, ## ylim = ylim.val, ## zlim = zlim.val, ## col = color.palette(length(levels) - 1), col = color_scheme, ## -- Implied from the color palette so no need to pass them? # levels = pretty(zlim.val, zres), # Note: you should use either level OR nlevels levels = zadd+zlevels*zscale, # Almost always ruins everything: # nlevels = zres, font.lab=2, font.axis=2, main='{title}', xlab='{xlabel}', ylab='{ylabel}', key.title=zlabel, # Axes around the main image, plus optional grid and contour plot.axes = { axis(1, pretty(xtics, n={nxtics})) axis(2, pretty(ytics, n={nytics})) # should support more contour/grid resolutions/line-types if ({contour}) { contour(xtics, ytics, zmatrix, col="{cocolor}", drawlabels={colabels}, lty="{colinetype}", lwd="{colinewidth}", labcex=1, vfont=c("sans serif","bold"), method="edge", add=T, ) } if ({grid}) { grid( col="{grcolor}", lwd="{grlinewidth}", lty="{grlinetype}", ) } }, # frame.plot = {frame}, # axes = F, # only affects the Z axis? # -- more stuff we can pass: # plot.title, key.axes, # asp = NA, xaxs = "i", yaxs = "i", las = 1, # xes = TRUE, ... ) mtext(zlabel, side=4, line=-5.5, font=2) } RcodeEOF sub prefix_file($$) { my ($f, $prefix) = @_; my ($dir, $basename) = ($f =~ m{^(.*/)?([^/]+)$}); $dir = '' unless (defined $dir); sprintf '%s%s%s', $dir, $prefix, $basename; } # verifies that we have prereqs installed (in PATH) # otherwise, refuses to run sub check_prereqs() { my @pathdirs = split(':', $ENV{PATH}); my %found = (); my @prereqs = ('R', $DisplayProg); foreach my $dir (@pathdirs) { foreach my $exe (@prereqs) { my $full_path = sprintf('%s/%s', $dir, $exe); if (-x $full_path) { $found{$exe} = 1; } } } foreach my $exe (@prereqs) { unless (exists $found{$exe}) { die "$0: sorry, couldn't find '$exe' in \$PATH: please install it\n"; } } } sub init() { $0 =~ s,.*/,,; check_prereqs(); getopts('Di:Ss:bd:Vv') || usage(); # X/Y/Z resolution (via options) $Vars{sum} = $opt_S ? 1 : 0; $Vars{black} = $opt_b ? 1 : 0; $Rverbosity = $opt_V ? '--verbose' : $opt_v ? '--silent' : '--slave --silent'; my ($xindex, $yindex, $zindex); foreach my $arg (@ARGV) { if (-f $arg) { if (exists $Vars{datafile}) { $Vars{imgfile} = $arg; } else { $Vars{datafile} = $arg; my $imgfile = "$arg.a.png"; $imgfile =~ s{.*/}{/tmp/}; $Vars{imgfile} = $imgfile; } } elsif ($arg =~ /=/) { my ($k, $v) = ($`, $'); my $key = abbrev2full($k) || $k; if ($key eq 'colorscheme') { $v = which_colorscheme($v); } $Vars{$key} = $v; } elsif ($arg =~ /^(\d+),(\d+),(\d+)$/) { ($xindex, $yindex, $zindex) = ($1, $2, $3); # print STDERR "xindex=$xindex yindex+$yindex zindex=$zindex\n"; } elsif ($arg =~ /^\d$/) { # an index, convert to R (base 1) and continue unless (defined $xindex) { $xindex = $arg; next; } unless (defined $yindex) { $yindex = $arg; next; } unless (defined $zindex) { $zindex = $arg; next; } usage("Too many index args (expecting max of 3: x, y, z)"); } else { # file doesn't exist, assume output name $Vars{imgfile} = $arg; } } $Vars{xindex} = (defined $xindex) ? $xindex : 1; $Vars{yindex} = (defined $yindex) ? $yindex : 2; $Vars{zindex} = (defined $zindex) ? $zindex : 3; # print STDERR "xindex=$xindex yindex=$yindex zindex=$zindex\n"; if ($opt_v) { foreach my $varname (sort keys %Vars) { printf STDERR "\$Vars{%s}=%s\n", $varname, $Vars{$varname}; } } usage("Must supply a datafile as input") unless (exists $Vars{datafile}); $Vars{sep} = guess_column_separator($Vars{datafile}); $Vars{header} = has_header($Vars{datafile}, $Vars{sep}) ? 'T' : 'F'; v("Vars{header} = %s\n", $Vars{header}); # zlog=1 sets to logbase=1 which doesn't make sense, foolproof this: $Vars{zlog} = 1.1 if ($Vars{zlog} == 1); } sub examine_data($$$$) { my ($datafile, $idx_ref, $sep, $has_header) = @_; my @idx = @$idx_ref; open(DF, $datafile) || die "%s: %s: %s\n", $0, $datafile, "$!"; if ($has_header); my $errors = 0; my @vn = qw(X Y Z); while () { chomp; my (@xyz) = (split($sep))[@idx]; for (my $i = 0; $i <= $#vn; $i++) { my $name = $vn[$i]; my $val = $xyz[$i]; unless (is_numeric($val)) { printf STDERR "\t%s: line %u: %s='%s' isn't numeric!\n", $datafile, $., $name, $val; $errors++; } } } close DF; $errors; } sub doplot() { while (my ($name, $value) = each %Vars) { $Rcode =~ s/{$name}/$value/gs; } open(RIN, "|R --no-save --no-environ --no-init-file $Rverbosity"); print RIN $Rcode; print RIN "ariel_heatmap('$Vars{datafile}')\n"; close RIN; if ($?) { print STDERR "$0: R failed: this is often a result of bad or non-numeric data\n", "=== Examining your data...\n"; my $errors = examine_data( $Vars{datafile}, [$Vars{xindex}-1, $Vars{yindex}-1, $Vars{zindex}-1], $Vars{sep}, $Vars{header} ); unless ($errors) { print STDERR "$0: done. hm wasn't able to find any problem in the data.\n", "Possibilities:\n", "1) The command line parameters you passed don't make sense.\n", "2) There's still a bug lurking somewhere.\n", "If you believe you've actually found a bug, please report\n", "[Attach both your data and \@ARGV below]\n", "ARGV=(@ARGV)\n"; } exit 1; } $Vars{imgfile} } sub display($) { my $imgfile = $_[0]; die "$0: display($imgfile): $! - must be a bug\n" unless (-e $imgfile); if ($opt_D) { # User requested no display, # Be friendly, give a hint that everything is OK printf STDERR "image file is: %s\n", $imgfile if (-t STDERR); return; } # Postscript files are generated in portrait: need 90-degrees rotation my $rotate = ($imgfile =~ /ps$/) ? '-rotate 90' : ''; $opt_i = '' unless (defined $opt_i); system("$DisplayProg $opt_i $rotate $imgfile"); } # -- main init(); my $img = doplot(); display($img); __END__ =head1 NAME: hm - a friendly 3D heatmap data visualization tool =head1 USAGE: hm [options] [var=settings...] [XYZ-column-indices...] datafile [imagefile] For an up-to-date, and more detailed usage, including listing all supported options, and variables, just call hm with no arguments. =head1 DESCRIPTION: hm (heatmap) generates a nice colored heatmap from CSV/TSV files. You give it a data file (CSV/TSV) with 3 or more numeric columns and it displays an image showing your data in a more understandable way where the 1st data column is mapped to the X-axis, the 2nd data column, to the Y-axis, and the 3rd data column to a color. B =over 2 =item * setting any parameter from the command-line =item * support any non-ambiguous abbreviation of a parameter name =item * mapping any data-column to any image axis (X, Y, Z) (handy if you have, say, more than 3 columns, or you want to remap columns to different axes) =item * double mapping a data column to two or more axes (handy in case you have less than 3 columns) =item * auto-detection of the column separator (CSV, TSV), more requests? =item * auto-detection of header in TSV/CSV =item * increasing or decreasing X, Y, Z resolution =item * range clipping into min/max values on any axis =item * setting image size: width, height =item * color control: 1) number of distinct colors, 2) gamma value =item * Adding contour lines and/or a rectangular grid =item * several color-schemes for the Z dimension at the time of this writing the supported schemes are: 'rainbow', 'brainbow', 'danger', 'topo', 'terrain', 'gray', 'cm'. The default is my personal favorite: rainbow. =item * linear transformations and log transformations on the Z-axis: stretching, shrinking, displacing. =item * Inspecting your data and telling you exactly where (file, line, column, value) it found a problem. This is something that R cryptic errors are pretty unhelpful about. =back =head1 OPTIONS & VAR SETTINGS: These keep changing, so for the sake of consistency and accuracy just call hm with no arguments and it will detail all of them. The only thing to know is that the syntax for options is -opt or -opt and the syntax for a variable setting is varname=value. Also abbreviations are supported for all var-names and color-schemes, as long as they are unambiguous. Any integer (1 or greater) passed on the command line after the options, will be interpreted as a data column index. You may map a single data column to all 3 axes (X, Y, Z) by passing, say: hm 1 1 1 datafile or its equivalent: hm 1,1,1 datafile Of course, that would be not very useful for understanding your data. The default is as expected: hm 1,2,3 =head1 PREREQUISITES: hm uses two free software packages underneath: 1) R 2) ImageMagick (the 'display' utility) hm will refuse to run if it can't find 'R' and 'display' in your $PATH If you're on Ubuntu/Debian all you need is to install two packages (if you don't have them already): $ sudo apt-get install r-recommended imagemagick On RedHat/Fedora it should be something like (not tested): $ sudo yum -y install R ImageMagick In all modesty: hm is little more than a friendly layer above the above two tools. Almost all the heavy lifting is done by R and display. All I was trying to achieve here is to be able to go quickly from a data file, to a picture that is worth a 1000 words, without having to learn a new language (like R), or having to convert my data to some obscure format before some free visualization tool can even load it. In short, after I tried several tools and was quite disappointed with the learning curves, I gave up, and wrote my own tool, only without reinventing the parts that were already available. =head1 EXAMPLES: For all these examples, you would need to download the (X, Y, Z) representation of the R volcano data-set. Get it from: http://finance.yendor.com/hm/volcano.tsv Put volcano.tsv file in the current directory and try: # A first look at the data: does it make sense to you? $ less volcano.tsv # Now try this, see the difference? $ hm volcano.tsv # Make the image really big: $ hm width=1500 height=1000 volcano.tsv # Down-quantize colors: use only 5 colors in color space: $ hm ncolors=5 volcano.tsv # Change the title (note the quote escape): $ hm title="Ariel\'s Pic" volcano.tsv # Change Z axis to log (base=1.1) of Z, set the Z label accordingly # increase number of colors to compensate for smaller range resolution $ hm zlog=1.1 zlabel='log 1.1 (height in meters)' ncol=40 volcano.tsv # Use the 'danger' (red .. green) color scheme $ hm colorscheme=danger volcano.tsv # Red-shift the rainbow color range (so it becomes redder) $ hm redder=3.0 volcano.tsv # Range clipping, look at the caldera only: $ hm xmin=7 xmax=50 ymin=12 ymax=56 volcano.tsv # Red-unshift the rainbow color range (so it becomes more violet) $ hm redder=0.3 volcano.tsv # Add a white grid $ hm grid=1 grcolor=white volcano.tsv # Add orange contour lines with 2-pixel line-width $ hm contour=1 cocolor=orange colinewidth=2 volcano.tsv # Add black contour lines and remove the contour labels $ hm contour=1 cocolor=black colabels=0 volcano.tsv =head1 TIPS: =over 2 =item * If you get only violet colors (no blue, green, yellow, red) it is possible that your data is dominated by low values and the very few high values are barely visible. This is common if for example your data is power-law distributed. In this case zlog=1.1 (or similar) is often the best solution. Another way to solve this is to limit your data to certain ranges using range clipping: for example, xmin=5 will eliminate all data triplets where (x < 5). You can range clip on any combination of (xmin, xmax, ymin, ymax, zmin, zmax) The last, and possibly the best way to get your Z-values into the optimal color range, without affecting any of the of the other (X,Y) dimensions and data-points is to apply a linear transformation: newZ = A + (B * Z) Solely to the Z (color) range, this can be done in two different ways: 1) Directly: using the two parameters: 'zadd' and 'zscale' where 'zadd' is 'A', and 'zscale' is 'B' in the linear transformation above. 2) Indirectly (and in a more user friendly way): by setting the lower/upper color bounds parameters: (zcmin, zcmax) if you set either of (zcmin, zcmax), hm will automatically do the math and calculate the (zadd, zscale) from them. For example: if you want to push your colors down to violet in the rainbow-spectrum, you may add an artificial 'dummy' layer on top of your data (this layer will be pushing all colors down towards violet). To do this set 'zcmax' to a value _higher_ than your actual maximum Z value. Alternatively, if you want to push your overall rainbow-color spectrum towards more red, simply set 'zcmin' to a value that is _lower_ than your actual minimum Z value. Here for example is the volcano data set with almost no red at the peak: $ hm zcmax=220 volcano.tsv Note that the parameter 'redder' is different: it introduces a non linear (logarithmic) transform on the color distribution 'redder', unlike (zcmin, zcmax) will make the color range non-uniform. =item * When limiting Z-range via zmin/zmax, hm will, by default, _clamp_ into the Z-range. If you want actual trimming (removal of the out of Z-range points) - use 'ztrim=1'. The reason clamping is the default is that trimming may leave unsightly 'holes' in the terrain. OTOH: ztrim=1 (trimming) is very useful for detecting outliers. Note that, zcmin/zcmax only affect scaling and streching of the Z range colors for color-mapping and displaying, they don't affect the values of the data points themselves. =item * If your data is big and hm takes too long to run, this is usually caused by image rendering. Reducing the resolution on any dimension with a wide range, e.g.: zres=256, can speed up rendering time To avoid pathological cases, hm limits resolutions on all 3 dimensions to 1024 by default. User provided values override this default. =back =head1 BUGS: Some var-settings are redundant and conflicting. You can in fact, shoot yourself in the foot by setting too many variables and leaving no degrees of freedom for the program to determine what it thinks are near optimal values. Some "unable to render" conditions are not detected before calling R on the data. hm could probably do a better job here by doing more checks. One notable example is a too narrow range (a single value) on either the X or Y dimension. If you find other bugs, please report them to the author. =head1 TODO: These are some ideas, not a promise or a plan. The safest way to see them happen is to send the author a patch. * Analyze column distributions to better determine axis resolutions * Support log scales on all axes (Currently only zlog=base is supported) * More control over grid (resolution mostly) * Support arbitrary transformations (user functions) on all axes * Support gaussian blurs on all axes, especially on (x,y) =head1 THANKS: Gilbert Leung and Kostas Tsioutsiouliklis for inspiration, initial feedback and ideas for improvement. Brian Gerard for early feedback that made hm much better. Larry Wall and the perl mongers, for perl. The thousands of contributors to GNU-R. Ross Ihaka for the original volcano data-set. ImageMagick Studio LLC for ImageMagick. Linus Torvalds and the thousands of Linux contributors for my productive development environment. You can have your name added here, just teach me how to do things more efficiently or elegantly with R, add more command line parameters that can be passed into R. etc. Just send me a clean patch (using "diff -u hm.orig hm"). I love patches. =head1 SEE ALSO R(1), ImageMagick: display(1) =head1 LICENSE Released under the GNU General Public Licence (GPL) v3 See http://www.gnu.org/licenses/gpl.html =head1 AUTHOR: Copyright (c) Ariel Faigon, 2007-2008 =head1 HOME PAGE URL: http://finance.yendor.com/hm/ =head1 CONTACT: To get around all spam-filters, you may contact the author via http://finance.yendor.com/contact/ =cut