diff --git a/.gitignore b/.gitignore index fc73b8c..366fde0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ session.inp *.lyx~ *.gp string_table.txt +*.png diff --git a/README.md b/README.md index a43a3ee..d294327 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ The user can control the following aspects by adding the respective parameter to - `fontsize`: int, Control the font size of the labels (default: 16) . - `grid`: bool, Draw a grid in the background if TRUE (=1) (default: FALSE) - `height`: scalar, Height of the canvas plot (default: 600). +- `title`: string: set an overall title for the plot(s). - `key`: bool, If the `factor` series is provided, a legend shows the color and point pattern for each distinct value of the `factor` variable. Default: 1 (TRUE). - `key_fontsize` int, Control the font size for the key. Default: 14 - `key_position`: string, Controls the position of the legend in each subplot (use standard gnuplot options). default: "top left". @@ -65,8 +66,16 @@ The user can control the following aspects by adding the respective parameter to - `use_circles`: bool, Draw circles instead of points if set to 1 (TRUE), default: 0 (FALSE). - `width`: scalar, Width of the canvas plot (default: 900). + # Changelog +* **v0.99 (Jule 2024)** + * Add new parameter `title` for setting an overall title + * Internal refactoring: Switch to gretl's built-in gridplot aparatus which means that all the graphics file formats supported by gretl can be used. + * It also means that usage via gretlcli in "display" mode works. + * In case the user's specification calls for just a single plot, no "multiplot" is created. + * The graphical interface is enhanced, with more options. + * **v0.98 (April 2024)** * Make width, height, fontsizes and pointsize a function of the number of variables for better readability in case of many variables. * Set parameter `transparency per default to 90. diff --git a/run_tests.sh b/run_tests.sh index c022f82..8e63751 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -2,13 +2,15 @@ set -e DIR=$(dirname $(realpath "$0")) # locate folder where this sh-script is located in -SCRIPT="./tests/run_tests.inp" +SCRIPT_1="./tests/run_tests.inp" +SCRIPT_2="./tests/pngtests.inp" PROJECT="PairPlot" cd $DIR echo "Switched to ${DIR}" -gretlcli -b -e -q ${SCRIPT} +gretlcli -b -e -q ${SCRIPT_1} +gretlcli -b -e -q ${SCRIPT_2} if [ $? -eq 0 ] then diff --git a/src/PairPlot.inp b/src/PairPlot.inp index ba84f3b..09cd6f4 100644 --- a/src/PairPlot.inp +++ b/src/PairPlot.inp @@ -33,6 +33,14 @@ function string get_plot_type (const bundle self) return type end function +function scalar is_valid_filename (const string filename) + if filename == "" + printf "\nError: Invalid file name.\n" + return FALSE + endif + return TRUE +end function + function scalar PairPlot (const list L "List of variables to plot", const series factor[null] "Discrete series for factorization", @@ -40,24 +48,13 @@ function scalar PairPlot (const list L "List of variables to plot", /* Main public function. return: TRUE on error, otherwise FALSE. */ - bundle self - if exists(opts) - self = opts - endif + bundle self = exists(opts) ? opts : _() self = self + default_options() + self.n_variables = nelem(L) - errorif($sysinfo.gui_mode == FALSE && self.filename == "display", - "the 'output=display`-mode is not supported with gretlcli, please use the GUI program instead or store the plot, if possible") - - # Some checks - if nelem(self.filename) == 0 - printf "\nError: Invalid file name.\n" + if !is_valid_filename(self.filename) || !has_min_two_series(L) return TRUE endif - if !has_min_two_series(L) - return TRUE - endif - self.n_variables = nelem(L) # Adjust fontsize as a function of the number of variables if !exists(opts) || inbundle(opts, "fontsize") == FALSE @@ -81,11 +78,14 @@ function scalar PairPlot (const list L "List of variables to plot", # Handle eventual missing values list ALL = L - ALL += exists(factor) ? factor : null + if exists(factor) + ALL += factor + endif smpl ALL --no-missing # Set some global(s) string self.plot_type = get_plot_type(self) + scalar self.n_plots = get_n_plots(self.type, self.n_variables) if exists(factor) add_information_of_factor(factor, &self) @@ -93,148 +93,153 @@ function scalar PairPlot (const list L "List of variables to plot", scalar self.n_factors = 1 matrix self.factor_values = {1} endif - code = isok_length_pointtype_vector(self) - if !code + + if !isok_length_pointtype_vector(self) return TRUE endif - self.filename = exists(filename) ? filename : self.filename - add_filetype(&self) - - string buffer = write_settings(&self) - string buffer += compile_gnuplot_cmd_data_buffer(L, self) - - return send_to_gnuplot(buffer, self.filename) + if self.n_plots > 1 + return call_gridplot(L, self) + else + return call_gnuplot(L, self) + endif end function -function string type_to_function_map (const string type) - /* Map type to function name. */ - - map =_(\ - triangle = "write_plot_cmd_and_data_tri",\ - matrix = "write_plot_cmd_and_data_mat",\ - row = "write_plot_cmd_and_data_mat",\ - column = "write_plot_cmd_and_data_mat"\ - ) +function string get_gridplot_opts (const bundle self) + /* construct an options string for the "gridplot" command */ - return map["@type"] -end function + strings S = array(0) + string opts = "" + + if self.type == "triangle" + S += "--layout=lmat" + elif self.type == "row" + S += "--rows=1" + elif self.type == "column" + S += "--cols=1" + endif + if self.width != 800 + S += sprintf("--width=%d", self.width) + endif + if self.height != 600 + S += sprintf("--height=%d", self.height) + endif + if inbundle(self, "title") + S += sprintf("--title=\"%s\"", self.title) + endif -function string compile_gnuplot_cmd_data_buffer (const list L, - const bundle self) - /* Compile gnuplot commands and data buffer for plotting. */ + if nelem(S) > 0 + opts = S[1] + endif - string funcname = type_to_function_map(self.type) + loop i=2..nelem(S) + opts += " " + opts += S[i] + endloop - return feval(funcname, L, self) + return opts end function +function string get_gp_opts (const bundle self) + /* construct an options string for gretl's "gnuplot" command */ -function void GUI_PairPlot (const list L "List of variables to plot", - const series factor[null] "Discrete series for factorization", - int type[1:4:1] "Plot type" {"triangle", "matrix", "row", "column"}) - /* Helper function for GUI access. */ - bundle opts = null - - if type == 1 - string opts.type = "triangle" - elif type == 2 - string opts.type = "matrix" - elif type == 3 - string opts.type = "row" - elif type == 4 - string opts.type = "column" - endif - if exists(factor) - scalar err = PairPlot(L, factor, opts) - else - scalar err = PairPlot(L, null, opts) + string opts = "" + if self.transparency_level > 0 + set force_decpoint on + opts += sprintf("--alpha=%g", self.transparency_level / 255) + set force_decpoint off endif + # TODO: font size choice? + return opts end function -function void add_filetype (bundle *self) - /* Determine type for plot file. */ +function scalar call_gridplot (const list L, const bundle self) + /* Called if there's more than one component plot. */ - if self.filename == "display" - self.filetype = "png" - else - scalar length = strlen(self.filename) - string s = substr(self.filename, (length - 4), length) - string tmp = strstr(s, ".") - string self.filetype = strsub(tmp, ".", "") - endif -end function + string outspec = self.filename + string gp_opts = get_gp_opts(self) + string grid_opts = get_gridplot_opts(self) + matrix lmat + if self.type == "triangle" + lmat = get_layout_matrix(nelem(L) - 1) + endif -function scalar isok_length_pointtype_vector (const bundle self) - /* Make sure length of 'pointtype' vector is at least as long the number of distinct factor values. - return: TRUE of length of 'pointtype' vector is at least as long the number of distinct factor values; otherwise FALSE*/ + gpbuild plotspecs + write_pp_plots(L, self, gp_opts) + end gpbuild - if inbundle(self, "n_factors") && nelem(self.pointtype) < self.n_factors - printf "\nError: 'factor' series has %d distinct values but you\n\ - have set a vector 'pointtype' with only %d elements. Abort.\n\n",\ - self.n_factors, nelem(self.pointtype) - return FALSE + catch gridplot plotspecs @grid_opts --output="@outspec" + scalar err = $error + if err + printf "Error in call_gridplot: %s\n", errmsg(err) endif - - return TRUE + return err end function +function scalar get_n_plots (const string type, int nvars) + /* Figure the total number of plots required. */ -function scalar plottype_to_rows_map (const bundle self) - /* Determine number of rows of multiplot depending on plot type. */ - - map =_(\ - triangle = self.n_variables - 1, - column = self.n_variables - 1, - matrix = self.n_variables, - row = 1\ - ) + scalar n1 = nvars - 1 - return map[self.type] + if type == "triangle" + return n1 * nvars / 2 + elif type == "matrix" + return n1 * nvars + else # row or column + return n1 + endif end function +function bundle PP_ui_maker (void) + /* set a specification for the first argument for GUI_PairPlot */ -function scalar plottype_to_cols_map (const bundle self) - /* Determine number of columns of multiplot depending on plot type. */ - - if self.type != "column" - return self.n_variables - 1 - else - return 1 - endif + bundle b + b.L = _(no_singleton=1, n_const=1) + return b end function -function string write_settings (bundle *self) - /* Write various settings to a string buffer. */ +function void GUI_PairPlot (const list L "List of series to plot", + const series factor[null] "Discrete series for factorization", + int type[1:4:1] "Plot type" {"triangle", "matrix", "row", "column"}, + int centroids[1:3:1] "Centroids" {"none", "mean", "median"}, + scalar transparency[0:1:0:0.1] "Transparency [0-1]") + /* Callback for PairPlot menu item. */ - string buffer - buffer += construct_terminal_cmd(self) + bundle opts + strings typestrs = defarray("triangle", "matrix", "row", "column") + strings cstrs = defarray("none", "mean", "median") - buffer += write_linetype(self.n_variables * self.n_factors, - get_color_definitions(), - self.transparency_level) + opts.type = typestrs[type] + opts.centroid = cstrs[centroids] + opts.transparency_level = 255 * transparency + PairPlot(L, factor, opts) +end function - buffer += sprintf("set multiplot layout %d,%d\n", - plottype_to_rows_map(self), plottype_to_cols_map(self)) - buffer += sprintf("set style line 101 lc rgb '#808080' lt 1 lw 1\n") - # drop right and upper border - buffer += sprintf("set border 3 front ls 101\n") - buffer += write_options(self) +function scalar isok_length_pointtype_vector (const bundle self) + /* Make sure the 'pointtype' vector has at least as many elements as + there are distinct factor values. Return TRUE if so, FALSE otherwise + */ + if inbundle(self, "n_factors") && (nelem(self.pointtype) < self.n_factors) + printf "\nError: 'factor' series has %d distinct values but you\n", self.n_factors + printf "have set a vector 'pointtype' with only %d elements. Abort.\n\n", + nelem(self.pointtype) + return FALSE + endif - return buffer + return TRUE end function function bundle default_options (void) /* Default parameter values. */ - bundle self = null + bundle self scalar self.height = 600 scalar self.width = 900 @@ -263,129 +268,69 @@ function bundle default_options (void) end function -function scalar send_to_gnuplot (const string buffer, - const string filename) - /* Write buffer to temp file and send to gnuplot */ - - string mytemp - outfile --tempfile=mytemp --quiet - print buffer - end outfile - - catch gnuplot --input="@mytemp" --output="@filename" - - return $error -end function - - -function string write_plot_cmd_and_data_mat (const list L, - const bundle self) - /* For case self.type=="matrix" case */ +function string write_pp_plots (const list L, + const bundle self, + const string gp_opts) + /* Common code for the single-plot and multiplot cases. + - In the single case we construct one plot-specification buffer + and pass it back to call_gnuplot() for execution. + - In the multiplot case we construct two or more plot specs and + pass them in turn to gretl's "gnuplot" command, in the context + of a gpbuild block in call_gridplot(). In this case the return + value of @buffer is ignored. + */ - string BUFFER = "" + scalar tri = self.type == "triangle" + string buffer = "" + scalar n = nelem(L) + scalar imin = tri ? 2 : 1 + scalar jmax = n + scalar k = 0 set force_decpoint on - loop foreach y L -q # Row dimension - - loop foreach x L -q # Col dimension - if varname(L.$y) != varname(L.$x) - - string buffer = "" - list yx = null - list yx = L.$x L.$y - strings vnames = varnames(yx) - - outfile --buffer=buffer - write_labels(vnames[2], vnames[1], self.fontsize) - factorized_centroids(yx, self) - if y > 2 # show key only for 1st pairplot - printf "set nokey\n" - endif - write_plot_cmd(self) - write_plot_data(yx, self) - end outfile - - BUFFER += buffer + loop i=imin..n + if tri + jmax = i-1 + endif + loop j=1..jmax + if j == i + continue + endif + k++ + buffer = "" + list xy = L[j] L[i] + strings vnames = varnames(xy) + outfile --buffer=buffer + write_options(self, xy, k) + write_labels(vnames, self.fontsize) + if self.centroid != "none" + factorized_centroids(xy, self) + endif + write_plot_cmd(self) + write_plot_data(xy, self) + end outfile + if self.n_plots > 1 # using gridplot + gnuplot --inbuf=buffer @gp_opts endif endloop - - if self.type == "row" || self.type == "column" + if k == self.n_plots break endif endloop set force_decpoint off - BUFFER += sprintf("unset multiplot") - - return BUFFER -end function - - -function string write_plot_cmd_and_data_tri (const list L, - const bundle self) - /* For self.type=="triangle" case */ - - string BUFFER = "" - set force_decpoint on - - # 1) Loop over all possible combinations - # -> Each one starts with a new 'plot' cmd - # 2) For each combination, consider the n_factors - - scalar y_counter = 0 - - loop foreach y L -q # Row dimension - y_counter++ - - scalar x_counter = 0 - - loop foreach x L -q # Column dimension - if y_counter == 1 - break - endif - - x_counter++ - - if varname(L.$y) != varname(L.$x) - string buffer = "" - list yx = null - list yx = L.$x L.$y - strings vnames = varnames(yx) - - if y_counter > x_counter - outfile --buffer=buffer - write_labels(vnames[2], vnames[1], self.fontsize) - factorized_centroids(yx, self) - if y_counter > 2 # show key only for 1st pairplot - printf "set nokey\n" - endif - write_plot_cmd(self) - write_plot_data(yx, self) - end outfile - else - buffer = sprintf("set multiplot next\n") - endif - - BUFFER += buffer - endif - endloop # end loop over x - endloop # end loop over y - set force_decpoint off - - BUFFER += sprintf("unset multiplot") - - return BUFFER + return buffer end function -function void factorized_centroids (const list yx, bundle self) +function void factorized_centroids (const list xy, bundle self) /* This function calculates and writes the centroids for each factor of a given list of variables. The function loops over the factors, restricts the sample to the observations where the factor series equals the current factor value, and then calls the 'write_centroids' function to calculate and write the centroids. Parameters: - yx: A list of variables for which the centroids are to be calculated. + xy: A list of variables for which the centroids are to be calculated. self: A bundle containing various options for the function. It should have the following fields: - 'n_factors': The number of factors. - 'factor_series': The series of factors. @@ -396,92 +341,86 @@ function void factorized_centroids (const list yx, bundle self) Note: This function assumes that the 'write_centroids' function is defined in the same scope. */ - loop i=1..self.n_factors -q + + strings colors = get_color_definitions() + + loop i=1..self.n_factors smpl full if self.n_factors > 1 smpl self.factor_series == self.factor_values[i] --restrict endif - smpl --no-missing yx + smpl --no-missing xy scalar self.ith_factor = i - write_centroids(yx, self) + write_centroids(xy, self, colors) endloop end function -function void write_centroids (const list yx, const bundle self) +function void write_centroids (const list xy, const bundle self, + const strings colors) /* This function calculates and writes the centroids for a given list of variables. The type of centroid (mean or median) is determined by the 'centroid' field in the 'self' bundle. If the 'centroid' field is set to 'none', the function does nothing. Parameters: - yx: A list of variables for which the centroids are to be calculated. + xy: A list of variables for which the centroids are to be calculated. self: A bundle containing various options for the function. It should have the following fields: - 'centroid': A string that determines the type of centroid to calculate. It can be 'mean', 'median', or 'none'. - 'centroid_label': A boolean that determines whether to label the centroids. If true, the centroids are labeled with their type ('Mean' or 'Median'). If false, no label is added. - 'centroid_pointsize': An integer that determines the point size of the centroids in the plot. - - The function calculates the centroids by looping over the variables in 'yx' and calculating their mean or median, depending on the 'centroid' field in 'self'. The centroids are then written to the plot with a label (if 'centroid_label' is true) and a specified point size ('centroid_pointsize'). The centroids are colored red. - - Note: This function does not return anything. */ - if self.centroid != "none" - matrix centroids = mshape(NA, 2, 1) # x,y - - loop loop foreach i yx - if self.centroid == "mean" - centroids[i] = mean(yx.$i) - elif self.centroid == "median" - centroids[i] = median(yx.$i) - endif - endloop - if self.centroid == "mean" - string label = self.centroid_label == TRUE ? "Mean" : "" - elif self.centroid == "median" - string label = self.centroid_label == TRUE ? "Median" : "" - endif + matrix cxy = zeros(1, 2) + string label = "" - if self.n_factors == 1 - string color = "FF0000" # color code "red" - else - string color = get_color_definitions()[self.ith_factor] + if self.centroid == "mean" + cxy = {mean(xy[1]), mean(xy[2])} + if self.centroid_label + label = "Mean" endif + else + cxy = {median(xy[1]), median(xy[2])} + if self.centroid_label + label = "Median" + endif + endif - # Draw some symbol for the i-th centroid - printf "set label %d \"%s\" at %g,%g point ps %g pt %d lw %g", - self.ith_factor, label, centroids[1], centroids[2], - self.centroid_pointsize, self.pointtype[self.ith_factor], - self.centroid_linewidth - # Color of centroid equals the color of the i-th factor - printf " lc rgb '#%s' front\n", color + if self.n_factors == 1 # no factors + string color = "FF0000" + else + string color = colors[self.ith_factor] endif + + # Draw a symbol (optionally with a label) for the i-th centroid + printf "set label %d \"%s\" at %g,%g point ps %g pt %d lw %g", + self.ith_factor, label, cxy[1], cxy[2], self.centroid_pointsize, + self.pointtype[self.ith_factor], self.centroid_linewidth + + # Color of centroid equals the color of the i-th factor + printf " lc rgb '#%s' front\n", color end function -function void write_labels (const string yname, - const string xname, - const int fontsize) - printf "set ylabel '%s' font ',%d'\n", yname, fontsize - printf "set xlabel '%s' font ',%d'\n", xname, fontsize +function void write_labels (const strings vnames, const int fontsize) + printf "set xlabel '%s' font ',%d'\n", vnames[1], fontsize + printf "set ylabel '%s' font ',%d'\n", vnames[2], fontsize end function -function void write_plot_data (const list yx, const bundle self) +function void write_plot_data (const list xy, const bundle self) /* Write data for a single subplot. */ - strings S = array(0) - loop i=1..self.n_factors -q smpl full if self.n_factors > 1 smpl self.factor_series == self.factor_values[i] --restrict endif - smpl --no-missing yx + smpl --no-missing xy - matrix m = {yx} # TODO: for huge data, this may become a bottleneck - cnameset(m, "") # avoid printing variable names to the gp-file - printf "%12.8g", m - printf "\ne\n" + matrix m = {xy} # TODO: for huge data, this may become a bottleneck + cnameset(m, "") # avoid printing variable names to the gp-file + printf "%14.8g", m # width 14 to avoid collisions + printf "e\n" endloop end function @@ -492,20 +431,20 @@ function void print_using_cmd (const bundle self, const int idx) printf "'-' using 1:2 w %s", self.plot_type if self.plot_type == "points" - printf " pt %d", self.pointtype[idx] + printf " pt %d", self.pointtype[idx] endif end function -function void print_plot_title (const bundle self, const int idx) - /* Print title string. */ +function void print_factor_title (const bundle self, const int idx) + /* Print title string for a factor. */ if inbundle(self, "factor_str_values") # If factor series comprises strings, these are used printf " title '%s'", self.factor_str_values[idx] else if self.n_factors == 1 - print " title ''" + print " notitle" else printf " title 'F=%d'", self.factor_values[idx] endif @@ -520,7 +459,7 @@ function void write_plot_cmd (const bundle self) loop i=1..self.n_factors print_using_cmd(self, $i) - print_plot_title(self, $i) + print_factor_title(self, $i) if i < self.n_factors printf ", \\\n" @@ -531,67 +470,93 @@ function void write_plot_cmd (const bundle self) end function -function string set_offsets (const scalar offset_level) - /* Write settings for offsets - return: string, offsets parameters */ +function void write_offsets (const scalar offset_level) + /* Write settings for offsets */ - string r = "set offsets " - - loop i=1..4 -q - r += sprintf("%.2f", offset_level) - r += i < 4 ? ", " : "" + printf "set offsets " + loop i=1..4 + printf "%.2f", offset_level + printf "%s", i < 4 ? ", " : "" endloop - - return r + printf "\n" end function -function string write_options (const bundle self) - /* Write gnuplot options to buffer and return as string. */ +function void write_key_spec (const bundle self, const list xy, + const scalar k) + /* TBA */ - set force_decpoint on + string pos = self.key_position + scalar do_font = 1 - string buffer - - outfile --buffer=buffer - printf "%s\n", set_offsets(self.offset_level) - - if self.with_key - printf "set key %s font ',%d'\n", self.key_position, - self.key_fontsize + if self.type == "triangle" && self.n_plots > 2 + /* We can avoid collision between the key and actual data + points by writing the key into a blank region of the grid, + for the first plot only. In this case use the regular + plot font. + */ + if k == 1 + pos = "at screen 0.6, screen 0.95" + do_font = 0 else - printf "set nokey\n" + pos = "" endif - if self.tics - printf "set xtics font ',%d'\n", self.tics_fontsize - printf "set ytics font ',%d' \n", self.tics_fontsize + elif self.type == "matrix" && self.n_plots > 4 + /* Write just one key (?), and try for best position to + avoid collision with data + */ + if k == 1 + if pos == "top left" && corr(xy[1], xy[2]) < -0.2 + pos = "top right" + endif else - printf "set noxtics\n" - printf "set noytics\n" + pos = "" endif + endif - if self.grid - printf "set style line 12 lc rgb '#808080' lt 0 lw 1\n" - printf "set grid ls 12\n" - endif + if pos == "" + printf "set nokey\n" + elif do_font == 0 + printf "set key %s\n", pos + else + printf "set key %s font ',%d'\n", pos, self.key_fontsize + endif +end function - printf "set pointsize %g\n", self.pointsize - # No documented yet, as it errors - /* - if ok(self.y_logscale) - printf "set logscale y %d\n", self.y_logscale - endif - if ok(self.x_logscale) - printf "set logscale x %d\n", self.x_logscale - endif - */ +function void write_options (const bundle self, const list xy, + scalar k) + /* Write gnuplot options. */ - end outfile + write_offsets(self.offset_level) - set force_decpoint off + if self.with_key + write_key_spec(self, xy, k) + else + printf "set nokey\n" + endif + if self.tics + printf "set xtics font ',%d'\n", self.tics_fontsize + printf "set ytics font ',%d' \n", self.tics_fontsize + else + printf "set noxtics\n" + printf "set noytics\n" + endif + if self.grid + printf "set style line 12 lc rgb '#808080' lt 0 lw 1\n" + printf "set grid ls 12\n" + endif + printf "set pointsize %g\n", self.pointsize - return buffer + # Not documented yet, as it errors + /* + if ok(self.y_logscale) + printf "set logscale y %d\n", self.y_logscale + endif + if ok(self.x_logscale) + printf "set logscale x %d\n", self.x_logscale + endif + */ end function @@ -625,71 +590,19 @@ function strings get_color_definitions (void) "C41E3A",\ "4682B4",\ "FFBF00",\ - "ff0000",\ - "00cc00",\ - "8faab3",\ - "0000ff",\ - "bf25b2",\ - "ffa500"\ + "FF0000",\ + "00CC00",\ + "8FAAB3",\ + "0000FF",\ + "BF25B2",\ + "FFA500"\ ) end function -function string write_linetype (const int n_variables[1::], - const strings color_definitions, - const int transparency_level[0:255:0]) - /* Print linetype commands. */ - - string linetype_out = "" - scalar counter = 1 - - outfile --buffer=linetype_out - loop i=1..n_variables -q - printf "set linetype %d lc rgb '#%2x%s'\n", - $i, transparency_level, color_definitions[counter] - - # Repeat colors if needed - counter = (counter == nelem(color_definitions)) ? 1 : (counter + 1) - endloop - end outfile - - return linetype_out -end function - - -function string filetype_to_terminal_map (const string filetype) - /* Return gnuplot terminal name depending on filetype. */ - - map =_(\ - png = "pngcairo noenhanced",\ - pdf = "pdf noenhanced",\ - eps = "postscript eps color noenhanced",\ - svg ="svg noenhanced"\ - ) - - return map["@filetype"] -end function - - -function string construct_terminal_cmd (const bundle self) - /* Construct the terminal for output. */ - - set force_decpoint on - - string set_cmd = "set terminal " - set_cmd += filetype_to_terminal_map(self.filetype) - set_cmd += sprintf(" font ',%d' ", self.fontsize) - set_cmd += sprintf(" size %g, %g\n", self.width, self.height) - - set force_decpoint off - - return set_cmd -end function - - function scalar calculate_pointsize (const scalar default_pointsize, const int n_variables) - /* Compute optimal fontsizeo as a function of the number + /* Compute optimal pointsize as a function of the number of variables to plot. */ scalar pointsize = xmin(5, default_pointsize / sqrt(n_variables)) @@ -699,7 +612,7 @@ end function function scalar calculate_fontsize (const scalar default_fontsize, const int n_variables) - /* Compute optimal fontsizeo as a function of the number + /* Compute optimal fontsize as a function of the number of variables to plot. */ scalar fontsize = xmin(20, default_fontsize / sqrt(n_variables)) @@ -727,3 +640,33 @@ function scalar calculate_plot_height (const scalar default_height, return height end function + + +function matrix get_layout_matrix (int n) + /* construct a suitable triangular layout matrix */ + + matrix ret = zeros(n, n) + scalar k = 1 + loop i=1..n + loop j=1..i + ret[i,j] = k++ + endloop + endloop + return ret +end function + + +function scalar call_gnuplot (const list L, const bundle self) + /* Called if just a single plot is to be produced. */ + + string outspec = self.filename + string buffer = write_pp_plots(L, self, "") + string gp_opts = get_gp_opts(self) + + catch gnuplot --inbuf=buffer @gp_opts --output="@outspec" + scalar err = $error + if err + printf "Error in call_gnuplot: %s\n", errmsg(err) + endif + return err +end function diff --git a/src/PairPlot.spec b/src/PairPlot.spec index aba731a..36a02da 100644 --- a/src/PairPlot.spec +++ b/src/PairPlot.spec @@ -1,11 +1,12 @@ -author = Artur Tarassow +author = Artur Tarassow and Allin Cottrell email = atecon@posteo.de -version = 0.98 -date = 2024-04-24 +version = 0.99 +date = 2024-07-03 description = Scatterplot matrix with factor separation tags = C88 -min-version = 2022d +min-version = 2024a gui-main = GUI_PairPlot +ui-maker = PP_ui_maker label = Pair Plot menu-attachment = MAINWIN/View/GraphVars public = PairPlot GUI_PairPlot diff --git a/src/PairPlot_help.md b/src/PairPlot_help.md index a43a3ee..d294327 100644 --- a/src/PairPlot_help.md +++ b/src/PairPlot_help.md @@ -44,6 +44,7 @@ The user can control the following aspects by adding the respective parameter to - `fontsize`: int, Control the font size of the labels (default: 16) . - `grid`: bool, Draw a grid in the background if TRUE (=1) (default: FALSE) - `height`: scalar, Height of the canvas plot (default: 600). +- `title`: string: set an overall title for the plot(s). - `key`: bool, If the `factor` series is provided, a legend shows the color and point pattern for each distinct value of the `factor` variable. Default: 1 (TRUE). - `key_fontsize` int, Control the font size for the key. Default: 14 - `key_position`: string, Controls the position of the legend in each subplot (use standard gnuplot options). default: "top left". @@ -65,8 +66,16 @@ The user can control the following aspects by adding the respective parameter to - `use_circles`: bool, Draw circles instead of points if set to 1 (TRUE), default: 0 (FALSE). - `width`: scalar, Width of the canvas plot (default: 900). + # Changelog +* **v0.99 (Jule 2024)** + * Add new parameter `title` for setting an overall title + * Internal refactoring: Switch to gretl's built-in gridplot aparatus which means that all the graphics file formats supported by gretl can be used. + * It also means that usage via gretlcli in "display" mode works. + * In case the user's specification calls for just a single plot, no "multiplot" is created. + * The graphical interface is enhanced, with more options. + * **v0.98 (April 2024)** * Make width, height, fontsizes and pointsize a function of the number of variables for better readability in case of many variables. * Set parameter `transparency per default to 90. diff --git a/src/PairPlot_sample.inp b/src/PairPlot_sample.inp index 00e3d4f..8ddbd49 100644 --- a/src/PairPlot_sample.inp +++ b/src/PairPlot_sample.inp @@ -1,30 +1,37 @@ -clear +clear --all set verbose off include PairPlot.gfn -scalar SAMPLE = 1 # Select an example +scalar SAMPLE = 1 # Select an example, 1 to 6 -if SAMPLE == 1 +if SAMPLE == 1 # simple plot of the iris data open iris.gdt --frompkg=PairPlot --quiet list y = 1..4 PairPlot(y) -elif SAMPLE == 2 # factorized scatterplots + centroids +elif SAMPLE == 2 # fancier plot of the iris data + open iris.gdt --quiet + list y = 1..4 + bundle opts = _(transparency_level = 120, + title = "Iris characteristics by variety") + PairPlot(y, variety, opts) + +elif SAMPLE == 3 # factorized scatterplots + centroids open credscore.gdt --quiet list y = 1..3 - series factor = Selfempl - + series factor = OwnRent + 1 + stringify(factor, defarray("renter", "owner")) bundle opts = _(transparency_level = 100, + title = "Credit data by home-ownership status", centroid = "mean", grid = TRUE) PairPlot(y, factor, opts) -elif SAMPLE == 3 # factorized plus some tweaking +elif SAMPLE == 4 # factorized plus some tweaking open abdata --quiet list y = n k series factor = IND - bundle opts = _(transparency_level = 175, centroid = "median", tics = FALSE, @@ -33,24 +40,27 @@ elif SAMPLE == 3 # factorized plus some tweaking centroid_linewidth = 3, height = 600, width = 600) - PairPlot(y, factor, opts) -elif SAMPLE == 4 # factorized scatter + string-valued factor series +elif SAMPLE == 5 # factorized scatter + string-valued factor series open mrw --quiet - list y = gdp60 gdp85 school + list y = gdp60 gdp85 i_y series factor = 1 + OECD - strings svalues = defarray("non-oecd", "oecd") - stringify(factor, svalues) - + stringify(factor, defarray("non-oecd", "oecd")) + # cut out some extreme outliers + smpl nonoil --dummy bundle opts = _(centroid = "mean", transparency_level = 150, type = "matrix", grid = TRUE, height = 800, - width = 800, - filename = "foo.png") # store locally + width = 900, + fontsize = 16, + key_fontsize = 11) PairPlot(y, factor, opts) -endif - +elif SAMPLE == 6 # string-valued factor series, using defaults + open data4-10.gdt --quiet + list y = ENROLL CATHOL + PairPlot(y, REGION) +endif diff --git a/tests/pngtests.inp b/tests/pngtests.inp new file mode 100644 index 0000000..fe30881 --- /dev/null +++ b/tests/pngtests.inp @@ -0,0 +1,62 @@ +# test script that can be run via gretlcli to produce PNG output +# for all the cases in PairPlot_sample.inp + +clear --all +set verbose off + +include PairPlot.gfn + +open iris.gdt --frompkg=PairPlot --quiet +list y = 1..4 +eval PairPlot(y, null, _(filename = "sample1.png")) + +list y = 1..4 +bundle opts = _(transparency_level = 120, + filename = "sample2.png", + title = "Iris characteristics by variety") +eval PairPlot(y, variety, opts) + +open credscore.gdt --quiet +list y = MDR..Age +series factor = OwnRent + 1 +stringify(factor, defarray("renter", "owner")) +bundle opts = _(transparency_level = 100, + filename = "sample3.png", + title = "Credit data by home-ownership status", + centroid = "mean", + grid = TRUE) +PairPlot(y, factor, opts) + +open abdata --quiet +list y = n k +series factor = IND +bundle opts = _(transparency_level = 175, + filename = "sample4.png", + centroid = "median", + tics = FALSE, + pointsize = 1.5, + centroid_pointsize = 3, + centroid_linewidth = 3, + height = 600, + width = 600) +PairPlot(y, factor, opts) + +open mrw --quiet +list y = gdp60 gdp85 i_y +series factor = 1 + OECD +strings svalues = defarray("non-oecd", "oecd") +stringify(factor, svalues) +# cut out some extreme outliers +smpl nonoil --dummy +bundle opts = _(centroid = "mean", + filename = "sample5.png", + transparency_level = 150, + type = "matrix", + grid = TRUE, + height = 800, + width = 900) +PairPlot(y, factor, opts) + +open data4-10.gdt --quiet +list y = ENROLL CATHOL +PairPlot(y, REGION, _(filename = "sample6.png")) diff --git a/tests/run_tests.inp b/tests/run_tests.inp index 5a965f3..c583fff 100644 --- a/tests/run_tests.inp +++ b/tests/run_tests.inp @@ -7,24 +7,6 @@ include "./src/PairPlot.inp" --force -bundles Params = null -Params = Params + _(type = "triangle", expected = "write_plot_cmd_and_data_tri") -Params = Params + _(type = "matrix", expected = "write_plot_cmd_and_data_mat") -Params = Params + _(type = "row", expected = "write_plot_cmd_and_data_mat") -Params = Params + _(type = "column", expected = "write_plot_cmd_and_data_mat") -function void test_type_to_function_map (const bundles P) - print "Start testing type_to_function_map()." - - loop foreach i P - # Given + When - actual = type_to_function_map(P[i].type) - - # Then - assert(actual == P[i].expected) - endloop -end function -test_type_to_function_map(Params) - bundles Params = null Params = Params + _(pointtype = {1, 2}', n_factors = 1, expected = TRUE) @@ -62,82 +44,6 @@ end function test_isok_length_pointtype_vec(Params) -bundles Params = null -Params = Params + _(filename = "foo.svg", expected = "svg") -function void test_add_filetype (const bundles P) - print "Start testing add_filetype()" - - loop foreach i P - # Given - bundle B = null - B = _(filename = P[i].filename) - - # When - add_filetype(&B) - - # Then - assert(B.filetype == P[i].expected) - endloop -end function -test_add_filetype(Params) - - - -bundles Params = null -Params = Params + _(type = "triangle", n_variables = 2, expected = 1) -Params = Params + _(type = "column", n_variables = 3, expected = 2) -Params = Params + _(type = "matrix", n_variables = 3, expected = 3) -Params = Params + _(type = "row", n_variables = 3, expected = 1) -Params = Params + _(type = "row", n_variables = 2, expected = 1) -function void test_plottype_to_rows_map (const bundles P) - print "Start testing plottype_to_rows_map()" - - loop foreach i P - # Given + When - actual = plottype_to_rows_map(P[i]) - - # Then - assert(actual == P[i].expected) - endloop -end function -test_plottype_to_rows_map(Params) - - -bundles Params = null -Params = Params + _(type = "triangle", n_variables = 2, expected = 1) -Params = Params + _(type = "column", n_variables = 3, expected = 1) -Params = Params + _(type = "matrix", n_variables = 3, expected = 2) -Params = Params + _(type = "row", n_variables = 3, expected = 2) -function void test_plottype_to_cols_map (const bundles P) - print "Start testing plottype_to_cols_map()" - - loop foreach i P - # Given + When - actual = plottype_to_cols_map(P[i]) - - # Then - assert(actual == P[i].expected) - endloop -end function -test_plottype_to_cols_map(Params) - - -function void test_set_offsets (void) - print "Start testing function set_offsets()" - - # Given - scalar offset_level = 0.11 - - # When - string actual = set_offsets(offset_level) - string expected = sprintf("set offsets %g, %g, %g, %g",\ - offset_level, offset_level, offset_level, offset_level\ - ) - - # Then - assert(actual == expected) -end function -test_set_offsets() print "Start running and test examples" nulldata 128 @@ -178,32 +84,5 @@ loop i=1..5 endloop -function void test_write_linetype (void) - print "Start testing write_linetype()" - - # given - scalar TRANSPARENCY = 10 - strings color_definitions = defarray("A", "B", "C") - scalar n_variables = nelem(color_definitions) - strings expected = defarray(\ - "set linetype 1 lc rgb '# aA'",\ - "set linetype 2 lc rgb '# aB'",\ - "set linetype 3 lc rgb '# aC'"\ - ) - - # When - string actual = write_linetype(n_variables, color_definitions, TRANSPARENCY) - strings actuals = strsplit(actual, "\n") - - # Then - loop foreach i expected - assert(actuals[i] == expected[i]) - endloop -end function -test_write_linetype() - - - - print "Succesfully finished all tests." quit