diff --git a/src/PairPlot.inp b/src/PairPlot.inp index ba84f3b..9b768b6 100644 --- a/src/PairPlot.inp +++ b/src/PairPlot.inp @@ -1,22 +1,9 @@ -function scalar has_min_two_series (const list L) - /* */ - - if nelem(L) < 2 - printf "\nError: List of variables must be comprise at least two series.\n" - return FALSE - endif - - return TRUE -end function - - function void add_information_of_factor (const series factor, bundle *self) /* Gather information of factor series. */ series self.factor_series = factor matrix self.factor_values = values(factor) scalar self.n_factors = nelem(self.factor_values) - # strings self.factor_strings = strvals(factor) bundle b = getinfo(factor) if b.has_string_table @@ -24,40 +11,24 @@ function void add_information_of_factor (const series factor, bundle *self) endif end function - -function string get_plot_type (const bundle self) - /* Retrieve plot type. */ - - string type = (self.use_circles == TRUE) ? "circles" : "points" - - return type -end function - - function scalar PairPlot (const list L "List of variables to plot", const series factor[null] "Discrete series for factorization", bundle opts[null] "Set options through bundle") /* Main public function. return: TRUE on error, otherwise FALSE. */ - bundle self - if exists(opts) - self = opts - endif + bundle self = exists(opts) ? opts : _() self = self + default_options() - - errorif($sysinfo.gui_mode == FALSE && self.filename == "display", - "the 'output=display`-mode is not supported with gretlcli, please use the GUI program instead or store the plot, if possible") + self.n_variables = nelem(L) # Some checks - if nelem(self.filename) == 0 + if self.filename == "" printf "\nError: Invalid file name.\n" return TRUE - endif - if !has_min_two_series(L) + elif self.n_variables < 2 + printf "\nError: List of variables must comprise at least two series.\n" return TRUE endif - self.n_variables = nelem(L) # Adjust fontsize as a function of the number of variables if !exists(opts) || inbundle(opts, "fontsize") == FALSE @@ -80,12 +51,15 @@ function scalar PairPlot (const list L "List of variables to plot", endif # Handle eventual missing values - list ALL = L - ALL += exists(factor) ? factor : null + list ALL = L + if exists(factor) + ALL += factor + endif smpl ALL --no-missing # Set some global(s) - string self.plot_type = get_plot_type(self) + string self.plot_type = self.use_circles ? "circles" : "points" + self.n_plots = get_n_plots(self.type, self.n_variables) if exists(factor) add_information_of_factor(factor, &self) @@ -93,148 +67,62 @@ function scalar PairPlot (const list L "List of variables to plot", scalar self.n_factors = 1 matrix self.factor_values = {1} endif - code = isok_length_pointtype_vector(self) - if !code + if !isok_length_pointtype_vector(self) return TRUE endif - self.filename = exists(filename) ? filename : self.filename - add_filetype(&self) - - string buffer = write_settings(&self) - string buffer += compile_gnuplot_cmd_data_buffer(L, self) - - return send_to_gnuplot(buffer, self.filename) -end function - - -function string type_to_function_map (const string type) - /* Map type to function name. */ - - map =_(\ - triangle = "write_plot_cmd_and_data_tri",\ - matrix = "write_plot_cmd_and_data_mat",\ - row = "write_plot_cmd_and_data_mat",\ - column = "write_plot_cmd_and_data_mat"\ - ) + string filename = self.filename - return map["@type"] + if self.n_plots > 1 + return call_gridplot(L, self, filename) + else + return call_gnuplot(L, self, filename) + endif end function -function string compile_gnuplot_cmd_data_buffer (const list L, - const bundle self) - /* Compile gnuplot commands and data buffer for plotting. */ +function bundle PP_ui_maker (void) + /* set a specification for the first argument for GUI_PairPlot */ - string funcname = type_to_function_map(self.type) - - return feval(funcname, L, self) + bundle b + b.L = _(no_singleton=1, n_const=1) + return b end function - -function void GUI_PairPlot (const list L "List of variables to plot", +function void GUI_PairPlot (const list L "List of series to plot", const series factor[null] "Discrete series for factorization", - int type[1:4:1] "Plot type" {"triangle", "matrix", "row", "column"}) - /* Helper function for GUI access. */ - bundle opts = null - - if type == 1 - string opts.type = "triangle" - elif type == 2 - string opts.type = "matrix" - elif type == 3 - string opts.type = "row" - elif type == 4 - string opts.type = "column" - endif - if exists(factor) - scalar err = PairPlot(L, factor, opts) - else - scalar err = PairPlot(L, null, opts) - endif -end function + int type[1:4:1] "Plot type" {"triangle", "matrix", "row", "column"}, + int centroids[1:3:1] "Centroids" {"none", "mean", "median"}, + scalar transparency[0:1:0:0.1] "Transparency") + /* Callback for PairPlot menu item. */ + bundle opts + strings typestrs = defarray("triangle", "matrix", "row", "column") + strings cstrs = defarray("none", "mean", "median") -function void add_filetype (bundle *self) - /* Determine type for plot file. */ - - if self.filename == "display" - self.filetype = "png" - else - scalar length = strlen(self.filename) - string s = substr(self.filename, (length - 4), length) - string tmp = strstr(s, ".") - string self.filetype = strsub(tmp, ".", "") - endif + opts.type = typestrs[type] + opts.centroid = cstrs[centroids] + opts.transparency_level = 255 * transparency + PairPlot(L, factor, opts) end function - function scalar isok_length_pointtype_vector (const bundle self) - /* Make sure length of 'pointtype' vector is at least as long the number of distinct factor values. - return: TRUE of length of 'pointtype' vector is at least as long the number of distinct factor values; otherwise FALSE*/ - + /* Make sure the 'pointtype' vector has at least as many elements as + there are distinct factor values. Return TRUE if so, FALSE otherwise + */ if inbundle(self, "n_factors") && nelem(self.pointtype) < self.n_factors - printf "\nError: 'factor' series has %d distinct values but you\n\ - have set a vector 'pointtype' with only %d elements. Abort.\n\n",\ - self.n_factors, nelem(self.pointtype) + printf "\nError: 'factor' series has %d distinct values but you\n" + printf "have set a vector 'pointtype' with only %d elements. Abort.\n\n", + self.n_factors, nelem(self.pointtype) return FALSE endif return TRUE end function - - -function scalar plottype_to_rows_map (const bundle self) - /* Determine number of rows of multiplot depending on plot type. */ - - map =_(\ - triangle = self.n_variables - 1, - column = self.n_variables - 1, - matrix = self.n_variables, - row = 1\ - ) - - return map[self.type] -end function - - -function scalar plottype_to_cols_map (const bundle self) - /* Determine number of columns of multiplot depending on plot type. */ - - if self.type != "column" - return self.n_variables - 1 - else - return 1 - endif -end function - - -function string write_settings (bundle *self) - /* Write various settings to a string buffer. */ - - string buffer - buffer += construct_terminal_cmd(self) - - buffer += write_linetype(self.n_variables * self.n_factors, - get_color_definitions(), - self.transparency_level) - - buffer += sprintf("set multiplot layout %d,%d\n", - plottype_to_rows_map(self), plottype_to_cols_map(self)) - - buffer += sprintf("set style line 101 lc rgb '#808080' lt 1 lw 1\n") - # drop right and upper border - buffer += sprintf("set border 3 front ls 101\n") - buffer += write_options(self) - - return buffer -end function - - function bundle default_options (void) /* Default parameter values. */ - bundle self = null + bundle self scalar self.height = 600 scalar self.width = 900 @@ -262,257 +150,155 @@ function bundle default_options (void) return self end function +function string write_pp_plots (const list L, + const bundle self, + const string gp_opts) + /* Common code for the single-plot and multiplot cases. + - In the single case we construct one plot-specification buffer + and pass it back to call_gnuplot() for execution. + - In the multiplot case we construct two or more plot specs and + pass them in turn to gretl's "gnuplot" command, in the context + of a gpbuild block in call_gridplot(). In this case the return + value of @buffer is ignored. + */ -function scalar send_to_gnuplot (const string buffer, - const string filename) - /* Write buffer to temp file and send to gnuplot */ - - string mytemp - outfile --tempfile=mytemp --quiet - print buffer - end outfile - - catch gnuplot --input="@mytemp" --output="@filename" - - return $error -end function - - -function string write_plot_cmd_and_data_mat (const list L, - const bundle self) - /* For case self.type=="matrix" case */ - - string BUFFER = "" + scalar tri = self.type == "triangle" + string buffer = "" + scalar n = nelem(L) + scalar imin = tri ? 2 : 1 + scalar jmax = n + scalar k = 0 set force_decpoint on - loop foreach y L -q # Row dimension - - loop foreach x L -q # Col dimension - if varname(L.$y) != varname(L.$x) - - string buffer = "" - list yx = null - list yx = L.$x L.$y - strings vnames = varnames(yx) - - outfile --buffer=buffer - write_labels(vnames[2], vnames[1], self.fontsize) - factorized_centroids(yx, self) - if y > 2 # show key only for 1st pairplot - printf "set nokey\n" - endif - write_plot_cmd(self) - write_plot_data(yx, self) - end outfile - - BUFFER += buffer + loop i=imin..n + if tri + jmax = i-1 + endif + loop j=1..jmax + if j == i + continue + endif + k++ + buffer = "" + list xy = L[j] L[i] + strings vnames = varnames(xy) + outfile --buffer=buffer + write_options(self, xy, k) + write_labels(vnames, self.fontsize) + if self.centroid != "none" + factorized_centroids(xy, self) + endif + write_plot_cmd(self) + write_plot_data(xy, self) + end outfile + if self.n_plots > 1 # using gridplot + gnuplot --inbuf=buffer @gp_opts endif endloop - - if self.type == "row" || self.type == "column" + if k == self.n_plots break endif endloop set force_decpoint off - BUFFER += sprintf("unset multiplot") - - return BUFFER -end function - - -function string write_plot_cmd_and_data_tri (const list L, - const bundle self) - /* For self.type=="triangle" case */ - - string BUFFER = "" - set force_decpoint on - - # 1) Loop over all possible combinations - # -> Each one starts with a new 'plot' cmd - # 2) For each combination, consider the n_factors - - scalar y_counter = 0 - - loop foreach y L -q # Row dimension - y_counter++ - - scalar x_counter = 0 - - loop foreach x L -q # Column dimension - if y_counter == 1 - break - endif - - x_counter++ - - if varname(L.$y) != varname(L.$x) - string buffer = "" - list yx = null - list yx = L.$x L.$y - strings vnames = varnames(yx) - - if y_counter > x_counter - outfile --buffer=buffer - write_labels(vnames[2], vnames[1], self.fontsize) - factorized_centroids(yx, self) - if y_counter > 2 # show key only for 1st pairplot - printf "set nokey\n" - endif - write_plot_cmd(self) - write_plot_data(yx, self) - end outfile - else - buffer = sprintf("set multiplot next\n") - endif - - BUFFER += buffer - endif - endloop # end loop over x - endloop # end loop over y - set force_decpoint off - - BUFFER += sprintf("unset multiplot") - - return BUFFER + return buffer end function +function void factorized_centroids (const list xy, bundle self) + strings colors = get_color_definitions() -function void factorized_centroids (const list yx, bundle self) - /* - This function calculates and writes the centroids for each factor of a given list of variables. The function loops over the factors, restricts the sample to the observations where the factor series equals the current factor value, and then calls the 'write_centroids' function to calculate and write the centroids. - - Parameters: - yx: A list of variables for which the centroids are to be calculated. - self: A bundle containing various options for the function. It should have the following fields: - - 'n_factors': The number of factors. - - 'factor_series': The series of factors. - - 'factor_values': The values of the factors. - - 'ith_factor': The current factor in the loop. - - The function does not return anything. It modifies the 'self' bundle by setting the 'ith_factor' field to the current factor in the loop. - - Note: This function assumes that the 'write_centroids' function is defined in the same scope. -*/ - loop i=1..self.n_factors -q + loop i=1..self.n_factors smpl full if self.n_factors > 1 smpl self.factor_series == self.factor_values[i] --restrict endif - smpl --no-missing yx + smpl --no-missing xy scalar self.ith_factor = i - write_centroids(yx, self) + write_centroids(xy, self, colors) endloop end function +function void write_centroids (const list xy, const bundle self, + const strings colors) + matrix cxy = zeros(1, 2) + string label = "" -function void write_centroids (const list yx, const bundle self) - /* - This function calculates and writes the centroids for a given list of variables. The type of centroid (mean or median) is determined by the 'centroid' field in the 'self' bundle. If the 'centroid' field is set to 'none', the function does nothing. - - Parameters: - yx: A list of variables for which the centroids are to be calculated. - self: A bundle containing various options for the function. It should have the following fields: - - 'centroid': A string that determines the type of centroid to calculate. It can be 'mean', 'median', or 'none'. - - 'centroid_label': A boolean that determines whether to label the centroids. If true, the centroids are labeled with their type ('Mean' or 'Median'). If false, no label is added. - - 'centroid_pointsize': An integer that determines the point size of the centroids in the plot. - - The function calculates the centroids by looping over the variables in 'yx' and calculating their mean or median, depending on the 'centroid' field in 'self'. The centroids are then written to the plot with a label (if 'centroid_label' is true) and a specified point size ('centroid_pointsize'). The centroids are colored red. - - Note: This function does not return anything. -*/ - if self.centroid != "none" - matrix centroids = mshape(NA, 2, 1) # x,y - - loop loop foreach i yx - if self.centroid == "mean" - centroids[i] = mean(yx.$i) - elif self.centroid == "median" - centroids[i] = median(yx.$i) - endif - endloop - - if self.centroid == "mean" - string label = self.centroid_label == TRUE ? "Mean" : "" - elif self.centroid == "median" - string label = self.centroid_label == TRUE ? "Median" : "" + if self.centroid == "mean" + cxy = {mean(xy[1]), mean(xy[2])} + if self.centroid_label + label = "Mean" endif - - if self.n_factors == 1 - string color = "FF0000" # color code "red" - else - string color = get_color_definitions()[self.ith_factor] + else + cxy = {median(xy[1]), median(xy[2])} + if self.centroid_label + label = "Median" endif - - # Draw some symbol for the i-th centroid - printf "set label %d \"%s\" at %g,%g point ps %g pt %d lw %g", - self.ith_factor, label, centroids[1], centroids[2], - self.centroid_pointsize, self.pointtype[self.ith_factor], - self.centroid_linewidth - # Color of centroid equals the color of the i-th factor - printf " lc rgb '#%s' front\n", color endif -end function + if self.n_factors == 1 # no factors + string color = "FF0000" + else + string color = colors[self.ith_factor] + endif -function void write_labels (const string yname, - const string xname, - const int fontsize) - printf "set ylabel '%s' font ',%d'\n", yname, fontsize - printf "set xlabel '%s' font ',%d'\n", xname, fontsize + # Draw a symbol (optionally with a label) for the i-th centroid + printf "set label %d \"%s\" at %g,%g point ps %g pt %d lw %g", + self.ith_factor, label, cxy[1], cxy[2], self.centroid_pointsize, + self.pointtype[self.ith_factor], self.centroid_linewidth + # Color of centroid equals the color of the i-th factor + printf " lc rgb '#%s' front\n", color end function +function void write_labels (const strings vnames, const int fontsize) + printf "set xlabel '%s' font ',%d'\n", vnames[1], fontsize + printf "set ylabel '%s' font ',%d'\n", vnames[2], fontsize +end function -function void write_plot_data (const list yx, const bundle self) +function void write_plot_data (const list xy, const bundle self) /* Write data for a single subplot. */ - strings S = array(0) - loop i=1..self.n_factors -q smpl full if self.n_factors > 1 smpl self.factor_series == self.factor_values[i] --restrict endif - smpl --no-missing yx + smpl --no-missing xy - matrix m = {yx} # TODO: for huge data, this may become a bottleneck - cnameset(m, "") # avoid printing variable names to the gp-file - printf "%12.8g", m - printf "\ne\n" + matrix m = {xy} # TODO: for huge data, this may become a bottleneck + cnameset(m, "") # avoid printing variable names to the gp-file + printf "%14.8g", m # width 14 to avoid collisions + printf "e\n" endloop end function - function void print_using_cmd (const bundle self, const int idx) /* Print gnuplot data details. */ printf "'-' using 1:2 w %s", self.plot_type if self.plot_type == "points" - printf " pt %d", self.pointtype[idx] + printf " pt %d", self.pointtype[idx] endif end function - -function void print_plot_title (const bundle self, const int idx) - /* Print title string. */ +function void print_factor_title (const bundle self, const int idx) + /* Print title string for a factor. */ if inbundle(self, "factor_str_values") # If factor series comprises strings, these are used printf " title '%s'", self.factor_str_values[idx] else if self.n_factors == 1 - print " title ''" + print " notitle" else printf " title 'F=%d'", self.factor_values[idx] endif endif end function - function void write_plot_cmd (const bundle self) /* Compile gnuplot initial commands. */ @@ -520,7 +306,7 @@ function void write_plot_cmd (const bundle self) loop i=1..self.n_factors print_using_cmd(self, $i) - print_plot_title(self, $i) + print_factor_title(self, $i) if i < self.n_factors printf ", \\\n" @@ -530,200 +316,251 @@ function void write_plot_cmd (const bundle self) endloop end function +function void write_offsets (const scalar offset_level) + /* Write settings for offsets */ -function string set_offsets (const scalar offset_level) - /* Write settings for offsets - return: string, offsets parameters */ - - string r = "set offsets " - - loop i=1..4 -q - r += sprintf("%.2f", offset_level) - r += i < 4 ? ", " : "" + printf "set offsets " + loop i=1..4 + printf "%.2f", offset_level + printf "%s", i < 4 ? ", " : "" endloop - - return r + printf "\n" end function +function void write_key_spec (const bundle self, const list xy, + scalar k) + string pos = self.key_position + scalar do_font = 1 -function string write_options (const bundle self) - /* Write gnuplot options to buffer and return as string. */ - - set force_decpoint on - - string buffer - - outfile --buffer=buffer - printf "%s\n", set_offsets(self.offset_level) - - if self.with_key - printf "set key %s font ',%d'\n", self.key_position, - self.key_fontsize + if self.type == "triangle" && self.n_plots > 2 + /* We can avoid collision between the key and actual data + points by writing the key into a blank region of the grid, + for the first plot only. In this case use the regular + plot font. + */ + if k == 1 + pos = "at screen 0.6, screen 0.95" + do_font = 0 else - printf "set nokey\n" + pos = "" endif - if self.tics - printf "set xtics font ',%d'\n", self.tics_fontsize - printf "set ytics font ',%d' \n", self.tics_fontsize + elif self.type == "matrix" && self.n_plots > 4 + /* Write just one key (?), and try for best position to + avoid collision with data + */ + if k == 1 + if pos == "top left" && corr(xy[1], xy[2]) < -0.2 + pos = "top right" + endif else - printf "set noxtics\n" - printf "set noytics\n" + pos = "" endif + endif - if self.grid - printf "set style line 12 lc rgb '#808080' lt 0 lw 1\n" - printf "set grid ls 12\n" - endif - - printf "set pointsize %g\n", self.pointsize - - # No documented yet, as it errors - /* - if ok(self.y_logscale) - printf "set logscale y %d\n", self.y_logscale - endif - if ok(self.x_logscale) - printf "set logscale x %d\n", self.x_logscale - endif - */ - - end outfile - - set force_decpoint off - - return buffer + if pos == "" + printf "set nokey\n" + elif do_font == 0 + printf "set key %s\n", pos + else + printf "set key %s font ',%d'\n", pos, self.key_fontsize + endif end function +function void write_options (const bundle self, const list xy, + scalar k) + /* Write gnuplot options. */ -function strings get_color_definitions (void) - /* Put your own definitions here. */ + write_offsets(self.offset_level) + if self.with_key + write_key_spec(self, xy, k) + else + printf "set nokey\n" + endif + if self.tics + printf "set xtics font ',%d'\n", self.tics_fontsize + printf "set ytics font ',%d' \n", self.tics_fontsize + else + printf "set noxtics\n" + printf "set noytics\n" + endif + if self.grid + printf "set style line 12 lc rgb '#808080' lt 0 lw 1\n" + printf "set grid ls 12\n" + endif + printf "set pointsize %g\n", self.pointsize + + # Not documented yet, as it errors /* - return defarray(\ - "C41E3A", \ - "4682B4", \ - "FFBF00", \ - "ff0000", \ - "00cc00", \ - "8faab3", \ - "0000ff", \ - "bf25b2", \ - "ffa500"\ - ) + if ok(self.y_logscale) + printf "set logscale y %d\n", self.y_logscale + endif + if ok(self.x_logscale) + printf "set logscale x %d\n", self.x_logscale + endif */ +end function +function strings get_color_definitions (void) # "dark2" palette - return defarray(\ - "1B9E77",\ - "D95F02",\ - "7570B3",\ - "E7298A",\ - "66A61E",\ - "E6AB02",\ - "A6761D",\ - "666666",\ - "C41E3A",\ - "4682B4",\ - "FFBF00",\ - "ff0000",\ - "00cc00",\ - "8faab3",\ - "0000ff",\ - "bf25b2",\ - "ffa500"\ - ) + return defarray("1B9E77", "D95F02", "7570B3", "E7298A", "66A61E", + "E6AB02", "A6761D", "666666", "C41E3A", "4682B4", "FFBF00", "FF0000", + "00CC00", "8FAAB3", "0000FF", "BF25B2", "FFA500") end function +function scalar calculate_pointsize (const scalar default_pointsize, + const int n_variables) + /* Compute optimal fontsizeo as a function of the number + of variables to plot. */ -function string write_linetype (const int n_variables[1::], - const strings color_definitions, - const int transparency_level[0:255:0]) - /* Print linetype commands. */ + scalar pointsize = xmin(5, default_pointsize / sqrt(n_variables)) - string linetype_out = "" - scalar counter = 1 + return pointsize +end function - outfile --buffer=linetype_out - loop i=1..n_variables -q - printf "set linetype %d lc rgb '#%2x%s'\n", - $i, transparency_level, color_definitions[counter] +function scalar calculate_fontsize (const scalar default_fontsize, + const int n_variables) + /* Compute optimal fontsizeo as a function of the number + of variables to plot. */ - # Repeat colors if needed - counter = (counter == nelem(color_definitions)) ? 1 : (counter + 1) - endloop - end outfile + scalar fontsize = xmin(20, default_fontsize / sqrt(n_variables)) - return linetype_out + return ceil(fontsize) end function -function string filetype_to_terminal_map (const string filetype) - /* Return gnuplot terminal name depending on filetype. */ +function scalar calculate_plot_width (const scalar default_width, + const int n_variables) + /* Compute optimal width of plot as a function of the number + of variables to plot. */ - map =_(\ - png = "pngcairo noenhanced",\ - pdf = "pdf noenhanced",\ - eps = "postscript eps color noenhanced",\ - svg ="svg noenhanced"\ - ) + scalar width = xmin(2800, default_width * (1 + sqrt(n_variables)/20)) - return map["@filetype"] + return int(width) end function +function scalar calculate_plot_height (const scalar default_height, + const int n_variables) + /* Compute optimal height of plot as a function of the number + of variables to plot. */ -function string construct_terminal_cmd (const bundle self) - /* Construct the terminal for output. */ + scalar height = xmin(2800, default_height * (1 + sqrt(n_variables)/20)) - set force_decpoint on + return int(height) +end function - string set_cmd = "set terminal " - set_cmd += filetype_to_terminal_map(self.filetype) - set_cmd += sprintf(" font ',%d' ", self.fontsize) - set_cmd += sprintf(" size %g, %g\n", self.width, self.height) +function scalar get_n_plots (const string type, int nvars) + /* Figure the total number of plots required. */ - set force_decpoint off + scalar n1 = nvars - 1 - return set_cmd + if type == "triangle" + return n1 * nvars / 2 + elif type == "matrix" + return n1 * nvars + else # row or column + return n1 + endif end function +function matrix get_layout_matrix (int n) + /* construct a suitable triangular layout matrix */ -function scalar calculate_pointsize (const scalar default_pointsize, - const int n_variables) - /* Compute optimal fontsizeo as a function of the number - of variables to plot. */ + matrix ret = zeros(n, n) + scalar k = 1 + loop i=1..n + loop j=1..i + ret[i,j] = k++ + endloop + endloop + return ret +end function - scalar pointsize = xmin(5, default_pointsize / sqrt(n_variables)) +function string get_gp_opts (const bundle self) + /* construct an options string for gretl's "gnuplot" command */ - return pointsize + string opts = "" + if self.transparency_level > 0 + set force_decpoint on + opts += sprintf("--alpha=%g", self.transparency_level / 255) + set force_decpoint off + endif + # FIXME font size choice? + return opts end function -function scalar calculate_fontsize (const scalar default_fontsize, - const int n_variables) - /* Compute optimal fontsizeo as a function of the number - of variables to plot. */ +function string get_gridplot_opts (const bundle self) + /* construct an options string for the "gridplot" command */ - scalar fontsize = xmin(20, default_fontsize / sqrt(n_variables)) + strings S = array(0) + string opts = "" + + if self.type == "triangle" + S += "--layout=lmat" + elif self.type == "row" + S += "--rows=1" + elif self.type == "column" + S += "--cols=1" + endif + if self.width != 800 + S += sprintf("--width=%d", self.width) + endif + if self.height != 600 + S += sprintf("--height=%d", self.height) + endif + if inbundle(self, "title") + S += sprintf("--title=\"%s\"", self.title) + endif - return ceil(fontsize) + if nelem(S) > 0 + opts = S[1] + endif + loop i=2..nelem(S) + opts += " " + opts += S[i] + endloop + + return opts end function +function scalar call_gridplot (const list L, const bundle self, + const string outspec) + /* Called if there's more than one component plot. */ -function scalar calculate_plot_width (const scalar default_width, - const int n_variables) - /* Compute optimal width of plot as a function of the number - of variables to plot. */ + string gp_opts = get_gp_opts(self) + string grid_opts = get_gridplot_opts(self) + matrix lmat - scalar width = xmin(2800, default_width * (1 + sqrt(n_variables)/20)) + if self.type == "triangle" + lmat = get_layout_matrix(nelem(L) - 1) + endif + + gpbuild plotspecs + write_pp_plots(L, self, gp_opts) + end gpbuild - return width + catch gridplot plotspecs @grid_opts --output="@outspec" + scalar err = $error + if err + printf "Error in call_gridplot: %s\n", errmsg(err) + endif + return err end function -function scalar calculate_plot_height (const scalar default_height, - const int n_variables) - /* Compute optimal height of plot as a function of the number - of variables to plot. */ +function scalar call_gnuplot (const list L, const bundle self, + const string outspec) + /* Called if just a single plot is to be produced. */ - scalar height = xmin(2800, default_height * (1 + sqrt(n_variables)/20)) + string buffer = write_pp_plots(L, self, "") + string gp_opts = get_gp_opts(self) - return height + catch gnuplot --inbuf=buffer @gp_opts --output="@outspec" + scalar err = $error + if err + printf "Error in call_gnuplot: %s\n", errmsg(err) + endif + return err end function + + + diff --git a/src/PairPlot.spec b/src/PairPlot.spec index aba731a..7be1ca2 100644 --- a/src/PairPlot.spec +++ b/src/PairPlot.spec @@ -1,11 +1,12 @@ author = Artur Tarassow email = atecon@posteo.de -version = 0.98 +version = 0.97 date = 2024-04-24 description = Scatterplot matrix with factor separation tags = C88 -min-version = 2022d +min-version = 2024a gui-main = GUI_PairPlot +ui-maker = PP_ui_maker label = Pair Plot menu-attachment = MAINWIN/View/GraphVars public = PairPlot GUI_PairPlot diff --git a/src/PairPlot_help.md b/src/PairPlot_help.md index a43a3ee..bc184f3 100644 --- a/src/PairPlot_help.md +++ b/src/PairPlot_help.md @@ -44,6 +44,7 @@ The user can control the following aspects by adding the respective parameter to - `fontsize`: int, Control the font size of the labels (default: 16) . - `grid`: bool, Draw a grid in the background if TRUE (=1) (default: FALSE) - `height`: scalar, Height of the canvas plot (default: 600). +- `title`: string: set an overall title for the plot(s). - `key`: bool, If the `factor` series is provided, a legend shows the color and point pattern for each distinct value of the `factor` variable. Default: 1 (TRUE). - `key_fontsize` int, Control the font size for the key. Default: 14 - `key_position`: string, Controls the position of the legend in each subplot (use standard gnuplot options). default: "top left". diff --git a/src/PairPlot_sample.inp b/src/PairPlot_sample.inp index 00e3d4f..ab35228 100644 --- a/src/PairPlot_sample.inp +++ b/src/PairPlot_sample.inp @@ -1,56 +1,64 @@ -clear +clear --all set verbose off include PairPlot.gfn -scalar SAMPLE = 1 # Select an example +scalar SAMPLE = 1 # Select an example, 1 to 6 -if SAMPLE == 1 +if SAMPLE == 1 # simple plot of the iris data open iris.gdt --frompkg=PairPlot --quiet list y = 1..4 - PairPlot(y) + eval PairPlot(y) -elif SAMPLE == 2 # factorized scatterplots + centroids +elif SAMPLE == 2 # fancier plot of the iris data + open iris.gdt --quiet + list y = 1..4 + bundle opts = _(transparency_level = 120, + title = "Iris characteristics by variety") + eval PairPlot(y, variety, opts) + +elif SAMPLE == 3 # factorized scatterplots + centroids open credscore.gdt --quiet list y = 1..3 - series factor = Selfempl - + series factor = OwnRent + 1 + stringify(factor, defarray("renter", "owner")) bundle opts = _(transparency_level = 100, - centroid = "mean", - grid = TRUE) + title = "Credit data by home-ownership status", + centroid = "mean", + grid = TRUE) PairPlot(y, factor, opts) -elif SAMPLE == 3 # factorized plus some tweaking +elif SAMPLE == 4 # factorized plus some tweaking open abdata --quiet list y = n k series factor = IND - bundle opts = _(transparency_level = 175, - centroid = "median", - tics = FALSE, - pointsize = 1.5, - centroid_pointsize = 3, - centroid_linewidth = 3, - height = 600, - width = 600) - + centroid = "median", + tics = FALSE, + pointsize = 1.5, + centroid_pointsize = 3, + centroid_linewidth = 3, + height = 600, + width = 600) PairPlot(y, factor, opts) -elif SAMPLE == 4 # factorized scatter + string-valued factor series +elif SAMPLE == 5 # factorized scatter + string-valued factor series open mrw --quiet - list y = gdp60 gdp85 school + list y = gdp60 gdp85 i_y series factor = 1 + OECD - strings svalues = defarray("non-oecd", "oecd") - stringify(factor, svalues) - + stringify(factor, defarray("non-oecd", "oecd")) + # cut out some extreme outliers + smpl nonoil --dummy bundle opts = _(centroid = "mean", - transparency_level = 150, - type = "matrix", - grid = TRUE, - height = 800, - width = 800, - filename = "foo.png") # store locally + transparency_level = 150, + type = "matrix", + grid = TRUE, + height = 800, + width = 900) PairPlot(y, factor, opts) -endif - +elif SAMPLE == 6 # string-valued factor series, using defaults + open data4-10.gdt --quiet + list y = ENROLL CATHOL + PairPlot(y, REGION) +endif diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 0000000..e33609d --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1 @@ +*.png diff --git a/tests/pngtests.inp b/tests/pngtests.inp new file mode 100644 index 0000000..fe30881 --- /dev/null +++ b/tests/pngtests.inp @@ -0,0 +1,62 @@ +# test script that can be run via gretlcli to produce PNG output +# for all the cases in PairPlot_sample.inp + +clear --all +set verbose off + +include PairPlot.gfn + +open iris.gdt --frompkg=PairPlot --quiet +list y = 1..4 +eval PairPlot(y, null, _(filename = "sample1.png")) + +list y = 1..4 +bundle opts = _(transparency_level = 120, + filename = "sample2.png", + title = "Iris characteristics by variety") +eval PairPlot(y, variety, opts) + +open credscore.gdt --quiet +list y = MDR..Age +series factor = OwnRent + 1 +stringify(factor, defarray("renter", "owner")) +bundle opts = _(transparency_level = 100, + filename = "sample3.png", + title = "Credit data by home-ownership status", + centroid = "mean", + grid = TRUE) +PairPlot(y, factor, opts) + +open abdata --quiet +list y = n k +series factor = IND +bundle opts = _(transparency_level = 175, + filename = "sample4.png", + centroid = "median", + tics = FALSE, + pointsize = 1.5, + centroid_pointsize = 3, + centroid_linewidth = 3, + height = 600, + width = 600) +PairPlot(y, factor, opts) + +open mrw --quiet +list y = gdp60 gdp85 i_y +series factor = 1 + OECD +strings svalues = defarray("non-oecd", "oecd") +stringify(factor, svalues) +# cut out some extreme outliers +smpl nonoil --dummy +bundle opts = _(centroid = "mean", + filename = "sample5.png", + transparency_level = 150, + type = "matrix", + grid = TRUE, + height = 800, + width = 900) +PairPlot(y, factor, opts) + +open data4-10.gdt --quiet +list y = ENROLL CATHOL +PairPlot(y, REGION, _(filename = "sample6.png"))