From f7f439ed4fc6567d176eaa467298377d561ca01a Mon Sep 17 00:00:00 2001 From: Artur Tarassow Date: Sun, 9 Jun 2024 18:43:56 +0200 Subject: [PATCH 01/11] add function checking filename; start some refactoring --- src/PairPlot.inp | 50 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/src/PairPlot.inp b/src/PairPlot.inp index ba84f3b..c33e008 100644 --- a/src/PairPlot.inp +++ b/src/PairPlot.inp @@ -33,6 +33,14 @@ function string get_plot_type (const bundle self) return type end function +function scalar is_valid_filename (const string filename) + if filename == "" + printf "\nError: Invalid file name.\n" + return FALSE + endif + return TRUE +end function + function scalar PairPlot (const list L "List of variables to plot", const series factor[null] "Discrete series for factorization", @@ -40,24 +48,13 @@ function scalar PairPlot (const list L "List of variables to plot", /* Main public function. return: TRUE on error, otherwise FALSE. */ - bundle self - if exists(opts) - self = opts - endif + bundle self = exists(opts) ? opts : _() self = self + default_options() + self.n_variables = nelem(L) - errorif($sysinfo.gui_mode == FALSE && self.filename == "display", - "the 'output=display`-mode is not supported with gretlcli, please use the GUI program instead or store the plot, if possible") - - # Some checks - if nelem(self.filename) == 0 - printf "\nError: Invalid file name.\n" - return TRUE - endif - if !has_min_two_series(L) + if !is_valid_filename(self.filename) || !has_min_two_series(L) return TRUE endif - self.n_variables = nelem(L) # Adjust fontsize as a function of the number of variables if !exists(opts) || inbundle(opts, "fontsize") == FALSE @@ -81,11 +78,14 @@ function scalar PairPlot (const list L "List of variables to plot", # Handle eventual missing values list ALL = L - ALL += exists(factor) ? factor : null + if exists(factor) + ALL += factor + endif smpl ALL --no-missing # Set some global(s) string self.plot_type = get_plot_type(self) + scalar self.n_plots = get_n_plots(self.type, self.n_variables) if exists(factor) add_information_of_factor(factor, &self) @@ -93,8 +93,8 @@ function scalar PairPlot (const list L "List of variables to plot", scalar self.n_factors = 1 matrix self.factor_values = {1} endif - code = isok_length_pointtype_vector(self) - if !code + + if !isok_length_pointtype_vector(self) return TRUE endif @@ -108,6 +108,22 @@ function scalar PairPlot (const list L "List of variables to plot", end function +function scalar get_n_plots (const string type, int nvars) + /* Figure the total number of plots required. */ + + scalar n1 = nvars - 1 + + if type == "triangle" + return n1 * nvars / 2 + elif type == "matrix" + return n1 * nvars + else # row or column + return n1 + endif +end function + + + function string type_to_function_map (const string type) /* Map type to function name. */ From d6f8a9fe422fc1a6fa5164e03782f7002368b0e0 Mon Sep 17 00:00:00 2001 From: Artur Tarassow Date: Sun, 9 Jun 2024 18:56:31 +0200 Subject: [PATCH 02/11] remove unnecessary functions --- src/PairPlot.inp | 135 ----------------------------------------------- 1 file changed, 135 deletions(-) diff --git a/src/PairPlot.inp b/src/PairPlot.inp index c33e008..753065b 100644 --- a/src/PairPlot.inp +++ b/src/PairPlot.inp @@ -124,29 +124,6 @@ end function -function string type_to_function_map (const string type) - /* Map type to function name. */ - - map =_(\ - triangle = "write_plot_cmd_and_data_tri",\ - matrix = "write_plot_cmd_and_data_mat",\ - row = "write_plot_cmd_and_data_mat",\ - column = "write_plot_cmd_and_data_mat"\ - ) - - return map["@type"] -end function - -function string compile_gnuplot_cmd_data_buffer (const list L, - const bundle self) - /* Compile gnuplot commands and data buffer for plotting. */ - - string funcname = type_to_function_map(self.type) - - return feval(funcname, L, self) -end function - - function void GUI_PairPlot (const list L "List of variables to plot", const series factor[null] "Discrete series for factorization", int type[1:4:1] "Plot type" {"triangle", "matrix", "row", "column"}) @@ -170,19 +147,6 @@ function void GUI_PairPlot (const list L "List of variables to plot", end function -function void add_filetype (bundle *self) - /* Determine type for plot file. */ - - if self.filename == "display" - self.filetype = "png" - else - scalar length = strlen(self.filename) - string s = substr(self.filename, (length - 4), length) - string tmp = strstr(s, ".") - string self.filetype = strsub(tmp, ".", "") - endif -end function - function scalar isok_length_pointtype_vector (const bundle self) /* Make sure length of 'pointtype' vector is at least as long the number of distinct factor values. @@ -200,53 +164,6 @@ end function -function scalar plottype_to_rows_map (const bundle self) - /* Determine number of rows of multiplot depending on plot type. */ - - map =_(\ - triangle = self.n_variables - 1, - column = self.n_variables - 1, - matrix = self.n_variables, - row = 1\ - ) - - return map[self.type] -end function - - -function scalar plottype_to_cols_map (const bundle self) - /* Determine number of columns of multiplot depending on plot type. */ - - if self.type != "column" - return self.n_variables - 1 - else - return 1 - endif -end function - - -function string write_settings (bundle *self) - /* Write various settings to a string buffer. */ - - string buffer - buffer += construct_terminal_cmd(self) - - buffer += write_linetype(self.n_variables * self.n_factors, - get_color_definitions(), - self.transparency_level) - - buffer += sprintf("set multiplot layout %d,%d\n", - plottype_to_rows_map(self), plottype_to_cols_map(self)) - - buffer += sprintf("set style line 101 lc rgb '#808080' lt 1 lw 1\n") - # drop right and upper border - buffer += sprintf("set border 3 front ls 101\n") - buffer += write_options(self) - - return buffer -end function - - function bundle default_options (void) /* Default parameter values. */ @@ -651,58 +568,6 @@ function strings get_color_definitions (void) end function -function string write_linetype (const int n_variables[1::], - const strings color_definitions, - const int transparency_level[0:255:0]) - /* Print linetype commands. */ - - string linetype_out = "" - scalar counter = 1 - - outfile --buffer=linetype_out - loop i=1..n_variables -q - printf "set linetype %d lc rgb '#%2x%s'\n", - $i, transparency_level, color_definitions[counter] - - # Repeat colors if needed - counter = (counter == nelem(color_definitions)) ? 1 : (counter + 1) - endloop - end outfile - - return linetype_out -end function - - -function string filetype_to_terminal_map (const string filetype) - /* Return gnuplot terminal name depending on filetype. */ - - map =_(\ - png = "pngcairo noenhanced",\ - pdf = "pdf noenhanced",\ - eps = "postscript eps color noenhanced",\ - svg ="svg noenhanced"\ - ) - - return map["@filetype"] -end function - - -function string construct_terminal_cmd (const bundle self) - /* Construct the terminal for output. */ - - set force_decpoint on - - string set_cmd = "set terminal " - set_cmd += filetype_to_terminal_map(self.filetype) - set_cmd += sprintf(" font ',%d' ", self.fontsize) - set_cmd += sprintf(" size %g, %g\n", self.width, self.height) - - set force_decpoint off - - return set_cmd -end function - - function scalar calculate_pointsize (const scalar default_pointsize, const int n_variables) /* Compute optimal fontsizeo as a function of the number From 900dba1dc79129ea39abcd32252673a468bb3e7f Mon Sep 17 00:00:00 2001 From: Artur Tarassow Date: Sun, 9 Jun 2024 19:32:21 +0200 Subject: [PATCH 03/11] update spec file --- src/PairPlot.spec | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/PairPlot.spec b/src/PairPlot.spec index aba731a..f82c561 100644 --- a/src/PairPlot.spec +++ b/src/PairPlot.spec @@ -1,11 +1,12 @@ author = Artur Tarassow email = atecon@posteo.de -version = 0.98 -date = 2024-04-24 +version = 0.99 +date = 2024-06-10 description = Scatterplot matrix with factor separation tags = C88 -min-version = 2022d +min-version = 2024a gui-main = GUI_PairPlot +ui-maker = PP_ui_maker label = Pair Plot menu-attachment = MAINWIN/View/GraphVars public = PairPlot GUI_PairPlot From 2da1f366320c953a13c411be3c9c55f77318c79e Mon Sep 17 00:00:00 2001 From: Artur Tarassow Date: Sun, 9 Jun 2024 19:32:40 +0200 Subject: [PATCH 04/11] update test script --- tests/run_tests.inp | 121 -------------------------------------------- 1 file changed, 121 deletions(-) diff --git a/tests/run_tests.inp b/tests/run_tests.inp index 5a965f3..c583fff 100644 --- a/tests/run_tests.inp +++ b/tests/run_tests.inp @@ -7,24 +7,6 @@ include "./src/PairPlot.inp" --force -bundles Params = null -Params = Params + _(type = "triangle", expected = "write_plot_cmd_and_data_tri") -Params = Params + _(type = "matrix", expected = "write_plot_cmd_and_data_mat") -Params = Params + _(type = "row", expected = "write_plot_cmd_and_data_mat") -Params = Params + _(type = "column", expected = "write_plot_cmd_and_data_mat") -function void test_type_to_function_map (const bundles P) - print "Start testing type_to_function_map()." - - loop foreach i P - # Given + When - actual = type_to_function_map(P[i].type) - - # Then - assert(actual == P[i].expected) - endloop -end function -test_type_to_function_map(Params) - bundles Params = null Params = Params + _(pointtype = {1, 2}', n_factors = 1, expected = TRUE) @@ -62,82 +44,6 @@ end function test_isok_length_pointtype_vec(Params) -bundles Params = null -Params = Params + _(filename = "foo.svg", expected = "svg") -function void test_add_filetype (const bundles P) - print "Start testing add_filetype()" - - loop foreach i P - # Given - bundle B = null - B = _(filename = P[i].filename) - - # When - add_filetype(&B) - - # Then - assert(B.filetype == P[i].expected) - endloop -end function -test_add_filetype(Params) - - - -bundles Params = null -Params = Params + _(type = "triangle", n_variables = 2, expected = 1) -Params = Params + _(type = "column", n_variables = 3, expected = 2) -Params = Params + _(type = "matrix", n_variables = 3, expected = 3) -Params = Params + _(type = "row", n_variables = 3, expected = 1) -Params = Params + _(type = "row", n_variables = 2, expected = 1) -function void test_plottype_to_rows_map (const bundles P) - print "Start testing plottype_to_rows_map()" - - loop foreach i P - # Given + When - actual = plottype_to_rows_map(P[i]) - - # Then - assert(actual == P[i].expected) - endloop -end function -test_plottype_to_rows_map(Params) - - -bundles Params = null -Params = Params + _(type = "triangle", n_variables = 2, expected = 1) -Params = Params + _(type = "column", n_variables = 3, expected = 1) -Params = Params + _(type = "matrix", n_variables = 3, expected = 2) -Params = Params + _(type = "row", n_variables = 3, expected = 2) -function void test_plottype_to_cols_map (const bundles P) - print "Start testing plottype_to_cols_map()" - - loop foreach i P - # Given + When - actual = plottype_to_cols_map(P[i]) - - # Then - assert(actual == P[i].expected) - endloop -end function -test_plottype_to_cols_map(Params) - - -function void test_set_offsets (void) - print "Start testing function set_offsets()" - - # Given - scalar offset_level = 0.11 - - # When - string actual = set_offsets(offset_level) - string expected = sprintf("set offsets %g, %g, %g, %g",\ - offset_level, offset_level, offset_level, offset_level\ - ) - - # Then - assert(actual == expected) -end function -test_set_offsets() print "Start running and test examples" nulldata 128 @@ -178,32 +84,5 @@ loop i=1..5 endloop -function void test_write_linetype (void) - print "Start testing write_linetype()" - - # given - scalar TRANSPARENCY = 10 - strings color_definitions = defarray("A", "B", "C") - scalar n_variables = nelem(color_definitions) - strings expected = defarray(\ - "set linetype 1 lc rgb '# aA'",\ - "set linetype 2 lc rgb '# aB'",\ - "set linetype 3 lc rgb '# aC'"\ - ) - - # When - string actual = write_linetype(n_variables, color_definitions, TRANSPARENCY) - strings actuals = strsplit(actual, "\n") - - # Then - loop foreach i expected - assert(actuals[i] == expected[i]) - endloop -end function -test_write_linetype() - - - - print "Succesfully finished all tests." quit From 332df4be6a3760e76f8ed286864f9b687270372f Mon Sep 17 00:00:00 2001 From: Artur Tarassow Date: Sun, 9 Jun 2024 19:35:39 +0200 Subject: [PATCH 05/11] update help text --- src/PairPlot_help.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/PairPlot_help.md b/src/PairPlot_help.md index a43a3ee..9cae15b 100644 --- a/src/PairPlot_help.md +++ b/src/PairPlot_help.md @@ -44,6 +44,7 @@ The user can control the following aspects by adding the respective parameter to - `fontsize`: int, Control the font size of the labels (default: 16) . - `grid`: bool, Draw a grid in the background if TRUE (=1) (default: FALSE) - `height`: scalar, Height of the canvas plot (default: 600). +- `title`: string: set an overall title for the plot(s). - `key`: bool, If the `factor` series is provided, a legend shows the color and point pattern for each distinct value of the `factor` variable. Default: 1 (TRUE). - `key_fontsize` int, Control the font size for the key. Default: 14 - `key_position`: string, Controls the position of the legend in each subplot (use standard gnuplot options). default: "top left". @@ -65,8 +66,16 @@ The user can control the following aspects by adding the respective parameter to - `use_circles`: bool, Draw circles instead of points if set to 1 (TRUE), default: 0 (FALSE). - `width`: scalar, Width of the canvas plot (default: 900). + # Changelog +* **v0.99 (June 2024)** + * Add new parameter `title` for setting an oveall title + * Internal refactoring: Switch to gretl's built-in gridplot aparatus which means that all the graphics file formats supported by gretl can be used. + * It also means that usage via gretlcli in "display" mode works. + * In case the user's specification calls for just a single plot, no "multiplot" is created. + * The graphical interface is enhanced, with more options. + * **v0.98 (April 2024)** * Make width, height, fontsizes and pointsize a function of the number of variables for better readability in case of many variables. * Set parameter `transparency per default to 90. From 823010b808664e748394364fcf86e5115ec18034 Mon Sep 17 00:00:00 2001 From: Artur Tarassow Date: Sun, 9 Jun 2024 19:48:33 +0200 Subject: [PATCH 06/11] update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index fc73b8c..366fde0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ session.inp *.lyx~ *.gp string_table.txt +*.png From af84e7ec2a51ae009951895fd903311405252ece Mon Sep 17 00:00:00 2001 From: Artur Tarassow Date: Sun, 9 Jun 2024 19:48:49 +0200 Subject: [PATCH 07/11] improve sample script --- src/PairPlot_sample.inp | 46 +++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/src/PairPlot_sample.inp b/src/PairPlot_sample.inp index 00e3d4f..da661db 100644 --- a/src/PairPlot_sample.inp +++ b/src/PairPlot_sample.inp @@ -1,30 +1,37 @@ -clear +clear --all set verbose off include PairPlot.gfn -scalar SAMPLE = 1 # Select an example +scalar SAMPLE = # Select an example, 1 to 6 -if SAMPLE == 1 +if SAMPLE == 1 # simple plot of the iris data open iris.gdt --frompkg=PairPlot --quiet list y = 1..4 PairPlot(y) -elif SAMPLE == 2 # factorized scatterplots + centroids +elif SAMPLE == 2 # fancier plot of the iris data + open iris.gdt --quiet + list y = 1..4 + bundle opts = _(transparency_level = 120, + title = "Iris characteristics by variety") + PairPlot(y, variety, opts) + +elif SAMPLE == 3 # factorized scatterplots + centroids open credscore.gdt --quiet list y = 1..3 - series factor = Selfempl - + series factor = OwnRent + 1 + stringify(factor, defarray("renter", "owner")) bundle opts = _(transparency_level = 100, + title = "Credit data by home-ownership status", centroid = "mean", grid = TRUE) PairPlot(y, factor, opts) -elif SAMPLE == 3 # factorized plus some tweaking +elif SAMPLE == 4 # factorized plus some tweaking open abdata --quiet list y = n k series factor = IND - bundle opts = _(transparency_level = 175, centroid = "median", tics = FALSE, @@ -33,24 +40,27 @@ elif SAMPLE == 3 # factorized plus some tweaking centroid_linewidth = 3, height = 600, width = 600) - PairPlot(y, factor, opts) -elif SAMPLE == 4 # factorized scatter + string-valued factor series +elif SAMPLE == 5 # factorized scatter + string-valued factor series open mrw --quiet - list y = gdp60 gdp85 school + list y = gdp60 gdp85 i_y series factor = 1 + OECD - strings svalues = defarray("non-oecd", "oecd") - stringify(factor, svalues) - + stringify(factor, defarray("non-oecd", "oecd")) + # cut out some extreme outliers + smpl nonoil --dummy bundle opts = _(centroid = "mean", transparency_level = 150, type = "matrix", grid = TRUE, height = 800, - width = 800, - filename = "foo.png") # store locally + width = 900, + fontsize = 16, + key_fontsize = 11) PairPlot(y, factor, opts) -endif - +elif SAMPLE == 6 # string-valued factor series, using defaults + open data4-10.gdt --quiet + list y = ENROLL CATHOL + PairPlot(y, REGION) +endif From a02e22af9c6420202abbce7711509fe6508518bb Mon Sep 17 00:00:00 2001 From: Artur Tarassow Date: Sun, 9 Jun 2024 19:51:41 +0200 Subject: [PATCH 08/11] add new test script which creates multiple png-files --- run_tests.sh | 6 +++-- tests/pngtests.inp | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 tests/pngtests.inp diff --git a/run_tests.sh b/run_tests.sh index c022f82..8e63751 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -2,13 +2,15 @@ set -e DIR=$(dirname $(realpath "$0")) # locate folder where this sh-script is located in -SCRIPT="./tests/run_tests.inp" +SCRIPT_1="./tests/run_tests.inp" +SCRIPT_2="./tests/pngtests.inp" PROJECT="PairPlot" cd $DIR echo "Switched to ${DIR}" -gretlcli -b -e -q ${SCRIPT} +gretlcli -b -e -q ${SCRIPT_1} +gretlcli -b -e -q ${SCRIPT_2} if [ $? -eq 0 ] then diff --git a/tests/pngtests.inp b/tests/pngtests.inp new file mode 100644 index 0000000..fe30881 --- /dev/null +++ b/tests/pngtests.inp @@ -0,0 +1,62 @@ +# test script that can be run via gretlcli to produce PNG output +# for all the cases in PairPlot_sample.inp + +clear --all +set verbose off + +include PairPlot.gfn + +open iris.gdt --frompkg=PairPlot --quiet +list y = 1..4 +eval PairPlot(y, null, _(filename = "sample1.png")) + +list y = 1..4 +bundle opts = _(transparency_level = 120, + filename = "sample2.png", + title = "Iris characteristics by variety") +eval PairPlot(y, variety, opts) + +open credscore.gdt --quiet +list y = MDR..Age +series factor = OwnRent + 1 +stringify(factor, defarray("renter", "owner")) +bundle opts = _(transparency_level = 100, + filename = "sample3.png", + title = "Credit data by home-ownership status", + centroid = "mean", + grid = TRUE) +PairPlot(y, factor, opts) + +open abdata --quiet +list y = n k +series factor = IND +bundle opts = _(transparency_level = 175, + filename = "sample4.png", + centroid = "median", + tics = FALSE, + pointsize = 1.5, + centroid_pointsize = 3, + centroid_linewidth = 3, + height = 600, + width = 600) +PairPlot(y, factor, opts) + +open mrw --quiet +list y = gdp60 gdp85 i_y +series factor = 1 + OECD +strings svalues = defarray("non-oecd", "oecd") +stringify(factor, svalues) +# cut out some extreme outliers +smpl nonoil --dummy +bundle opts = _(centroid = "mean", + filename = "sample5.png", + transparency_level = 150, + type = "matrix", + grid = TRUE, + height = 800, + width = 900) +PairPlot(y, factor, opts) + +open data4-10.gdt --quiet +list y = ENROLL CATHOL +PairPlot(y, REGION, _(filename = "sample6.png")) From c1b3ba9c04f8e36945d0fca3c3501828092aae7f Mon Sep 17 00:00:00 2001 From: Artur Tarassow Date: Sun, 9 Jun 2024 19:56:25 +0200 Subject: [PATCH 09/11] add Allin as co-author --- src/PairPlot.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PairPlot.spec b/src/PairPlot.spec index f82c561..eaa4a7f 100644 --- a/src/PairPlot.spec +++ b/src/PairPlot.spec @@ -1,4 +1,4 @@ -author = Artur Tarassow +author = Artur Tarassow and Allin Cottrell email = atecon@posteo.de version = 0.99 date = 2024-06-10 From b232fbee863ea3e1b5cf0a89f90a29b94e25ddbe Mon Sep 17 00:00:00 2001 From: Artur Tarassow Date: Sun, 9 Jun 2024 19:57:06 +0200 Subject: [PATCH 10/11] refactoring of main functions --- src/PairPlot.inp | 548 ++++++++++++++++++++++++++--------------------- 1 file changed, 305 insertions(+), 243 deletions(-) diff --git a/src/PairPlot.inp b/src/PairPlot.inp index 753065b..09cd6f4 100644 --- a/src/PairPlot.inp +++ b/src/PairPlot.inp @@ -98,13 +98,85 @@ function scalar PairPlot (const list L "List of variables to plot", return TRUE endif - self.filename = exists(filename) ? filename : self.filename - add_filetype(&self) + if self.n_plots > 1 + return call_gridplot(L, self) + else + return call_gnuplot(L, self) + endif +end function - string buffer = write_settings(&self) - string buffer += compile_gnuplot_cmd_data_buffer(L, self) - return send_to_gnuplot(buffer, self.filename) +function string get_gridplot_opts (const bundle self) + /* construct an options string for the "gridplot" command */ + + strings S = array(0) + string opts = "" + + if self.type == "triangle" + S += "--layout=lmat" + elif self.type == "row" + S += "--rows=1" + elif self.type == "column" + S += "--cols=1" + endif + if self.width != 800 + S += sprintf("--width=%d", self.width) + endif + if self.height != 600 + S += sprintf("--height=%d", self.height) + endif + if inbundle(self, "title") + S += sprintf("--title=\"%s\"", self.title) + endif + + if nelem(S) > 0 + opts = S[1] + endif + + loop i=2..nelem(S) + opts += " " + opts += S[i] + endloop + + return opts +end function + +function string get_gp_opts (const bundle self) + /* construct an options string for gretl's "gnuplot" command */ + + string opts = "" + if self.transparency_level > 0 + set force_decpoint on + opts += sprintf("--alpha=%g", self.transparency_level / 255) + set force_decpoint off + endif + # TODO: font size choice? + return opts +end function + + +function scalar call_gridplot (const list L, const bundle self) + /* Called if there's more than one component plot. */ + + string outspec = self.filename + string gp_opts = get_gp_opts(self) + string grid_opts = get_gridplot_opts(self) + matrix lmat + + if self.type == "triangle" + lmat = get_layout_matrix(nelem(L) - 1) + endif + + gpbuild plotspecs + write_pp_plots(L, self, gp_opts) + end gpbuild + + catch gridplot plotspecs @grid_opts --output="@outspec" + scalar err = $error + if err + printf "Error in call_gridplot: %s\n", errmsg(err) + endif + return err end function @@ -122,40 +194,41 @@ function scalar get_n_plots (const string type, int nvars) endif end function +function bundle PP_ui_maker (void) + /* set a specification for the first argument for GUI_PairPlot */ + + bundle b + b.L = _(no_singleton=1, n_const=1) + return b +end function -function void GUI_PairPlot (const list L "List of variables to plot", +function void GUI_PairPlot (const list L "List of series to plot", const series factor[null] "Discrete series for factorization", - int type[1:4:1] "Plot type" {"triangle", "matrix", "row", "column"}) - /* Helper function for GUI access. */ - bundle opts = null - - if type == 1 - string opts.type = "triangle" - elif type == 2 - string opts.type = "matrix" - elif type == 3 - string opts.type = "row" - elif type == 4 - string opts.type = "column" - endif - if exists(factor) - scalar err = PairPlot(L, factor, opts) - else - scalar err = PairPlot(L, null, opts) - endif + int type[1:4:1] "Plot type" {"triangle", "matrix", "row", "column"}, + int centroids[1:3:1] "Centroids" {"none", "mean", "median"}, + scalar transparency[0:1:0:0.1] "Transparency [0-1]") + /* Callback for PairPlot menu item. */ + + bundle opts + strings typestrs = defarray("triangle", "matrix", "row", "column") + strings cstrs = defarray("none", "mean", "median") + + opts.type = typestrs[type] + opts.centroid = cstrs[centroids] + opts.transparency_level = 255 * transparency + PairPlot(L, factor, opts) end function - function scalar isok_length_pointtype_vector (const bundle self) - /* Make sure length of 'pointtype' vector is at least as long the number of distinct factor values. - return: TRUE of length of 'pointtype' vector is at least as long the number of distinct factor values; otherwise FALSE*/ - - if inbundle(self, "n_factors") && nelem(self.pointtype) < self.n_factors - printf "\nError: 'factor' series has %d distinct values but you\n\ - have set a vector 'pointtype' with only %d elements. Abort.\n\n",\ - self.n_factors, nelem(self.pointtype) + /* Make sure the 'pointtype' vector has at least as many elements as + there are distinct factor values. Return TRUE if so, FALSE otherwise + */ + if inbundle(self, "n_factors") && (nelem(self.pointtype) < self.n_factors) + printf "\nError: 'factor' series has %d distinct values but you\n", self.n_factors + printf "have set a vector 'pointtype' with only %d elements. Abort.\n\n", + nelem(self.pointtype) return FALSE endif @@ -163,11 +236,10 @@ function scalar isok_length_pointtype_vector (const bundle self) end function - function bundle default_options (void) /* Default parameter values. */ - bundle self = null + bundle self scalar self.height = 600 scalar self.width = 900 @@ -196,129 +268,69 @@ function bundle default_options (void) end function -function scalar send_to_gnuplot (const string buffer, - const string filename) - /* Write buffer to temp file and send to gnuplot */ - - string mytemp - outfile --tempfile=mytemp --quiet - print buffer - end outfile - - catch gnuplot --input="@mytemp" --output="@filename" - - return $error -end function - - -function string write_plot_cmd_and_data_mat (const list L, - const bundle self) - /* For case self.type=="matrix" case */ +function string write_pp_plots (const list L, + const bundle self, + const string gp_opts) + /* Common code for the single-plot and multiplot cases. + - In the single case we construct one plot-specification buffer + and pass it back to call_gnuplot() for execution. + - In the multiplot case we construct two or more plot specs and + pass them in turn to gretl's "gnuplot" command, in the context + of a gpbuild block in call_gridplot(). In this case the return + value of @buffer is ignored. + */ - string BUFFER = "" + scalar tri = self.type == "triangle" + string buffer = "" + scalar n = nelem(L) + scalar imin = tri ? 2 : 1 + scalar jmax = n + scalar k = 0 set force_decpoint on - loop foreach y L -q # Row dimension - - loop foreach x L -q # Col dimension - if varname(L.$y) != varname(L.$x) - - string buffer = "" - list yx = null - list yx = L.$x L.$y - strings vnames = varnames(yx) - - outfile --buffer=buffer - write_labels(vnames[2], vnames[1], self.fontsize) - factorized_centroids(yx, self) - if y > 2 # show key only for 1st pairplot - printf "set nokey\n" - endif - write_plot_cmd(self) - write_plot_data(yx, self) - end outfile - - BUFFER += buffer + loop i=imin..n + if tri + jmax = i-1 + endif + loop j=1..jmax + if j == i + continue + endif + k++ + buffer = "" + list xy = L[j] L[i] + strings vnames = varnames(xy) + outfile --buffer=buffer + write_options(self, xy, k) + write_labels(vnames, self.fontsize) + if self.centroid != "none" + factorized_centroids(xy, self) + endif + write_plot_cmd(self) + write_plot_data(xy, self) + end outfile + if self.n_plots > 1 # using gridplot + gnuplot --inbuf=buffer @gp_opts endif endloop - - if self.type == "row" || self.type == "column" + if k == self.n_plots break endif endloop set force_decpoint off - BUFFER += sprintf("unset multiplot") - - return BUFFER -end function - - -function string write_plot_cmd_and_data_tri (const list L, - const bundle self) - /* For self.type=="triangle" case */ - - string BUFFER = "" - set force_decpoint on - - # 1) Loop over all possible combinations - # -> Each one starts with a new 'plot' cmd - # 2) For each combination, consider the n_factors - - scalar y_counter = 0 - - loop foreach y L -q # Row dimension - y_counter++ - - scalar x_counter = 0 - - loop foreach x L -q # Column dimension - if y_counter == 1 - break - endif - - x_counter++ - - if varname(L.$y) != varname(L.$x) - string buffer = "" - list yx = null - list yx = L.$x L.$y - strings vnames = varnames(yx) - - if y_counter > x_counter - outfile --buffer=buffer - write_labels(vnames[2], vnames[1], self.fontsize) - factorized_centroids(yx, self) - if y_counter > 2 # show key only for 1st pairplot - printf "set nokey\n" - endif - write_plot_cmd(self) - write_plot_data(yx, self) - end outfile - else - buffer = sprintf("set multiplot next\n") - endif - - BUFFER += buffer - endif - endloop # end loop over x - endloop # end loop over y - set force_decpoint off - - BUFFER += sprintf("unset multiplot") - - return BUFFER + return buffer end function -function void factorized_centroids (const list yx, bundle self) +function void factorized_centroids (const list xy, bundle self) /* This function calculates and writes the centroids for each factor of a given list of variables. The function loops over the factors, restricts the sample to the observations where the factor series equals the current factor value, and then calls the 'write_centroids' function to calculate and write the centroids. Parameters: - yx: A list of variables for which the centroids are to be calculated. + xy: A list of variables for which the centroids are to be calculated. self: A bundle containing various options for the function. It should have the following fields: - 'n_factors': The number of factors. - 'factor_series': The series of factors. @@ -329,92 +341,86 @@ function void factorized_centroids (const list yx, bundle self) Note: This function assumes that the 'write_centroids' function is defined in the same scope. */ - loop i=1..self.n_factors -q + + strings colors = get_color_definitions() + + loop i=1..self.n_factors smpl full if self.n_factors > 1 smpl self.factor_series == self.factor_values[i] --restrict endif - smpl --no-missing yx + smpl --no-missing xy scalar self.ith_factor = i - write_centroids(yx, self) + write_centroids(xy, self, colors) endloop end function -function void write_centroids (const list yx, const bundle self) +function void write_centroids (const list xy, const bundle self, + const strings colors) /* This function calculates and writes the centroids for a given list of variables. The type of centroid (mean or median) is determined by the 'centroid' field in the 'self' bundle. If the 'centroid' field is set to 'none', the function does nothing. Parameters: - yx: A list of variables for which the centroids are to be calculated. + xy: A list of variables for which the centroids are to be calculated. self: A bundle containing various options for the function. It should have the following fields: - 'centroid': A string that determines the type of centroid to calculate. It can be 'mean', 'median', or 'none'. - 'centroid_label': A boolean that determines whether to label the centroids. If true, the centroids are labeled with their type ('Mean' or 'Median'). If false, no label is added. - 'centroid_pointsize': An integer that determines the point size of the centroids in the plot. - - The function calculates the centroids by looping over the variables in 'yx' and calculating their mean or median, depending on the 'centroid' field in 'self'. The centroids are then written to the plot with a label (if 'centroid_label' is true) and a specified point size ('centroid_pointsize'). The centroids are colored red. - - Note: This function does not return anything. */ - if self.centroid != "none" - matrix centroids = mshape(NA, 2, 1) # x,y - - loop loop foreach i yx - if self.centroid == "mean" - centroids[i] = mean(yx.$i) - elif self.centroid == "median" - centroids[i] = median(yx.$i) - endif - endloop - if self.centroid == "mean" - string label = self.centroid_label == TRUE ? "Mean" : "" - elif self.centroid == "median" - string label = self.centroid_label == TRUE ? "Median" : "" - endif + matrix cxy = zeros(1, 2) + string label = "" - if self.n_factors == 1 - string color = "FF0000" # color code "red" - else - string color = get_color_definitions()[self.ith_factor] + if self.centroid == "mean" + cxy = {mean(xy[1]), mean(xy[2])} + if self.centroid_label + label = "Mean" + endif + else + cxy = {median(xy[1]), median(xy[2])} + if self.centroid_label + label = "Median" endif + endif - # Draw some symbol for the i-th centroid - printf "set label %d \"%s\" at %g,%g point ps %g pt %d lw %g", - self.ith_factor, label, centroids[1], centroids[2], - self.centroid_pointsize, self.pointtype[self.ith_factor], - self.centroid_linewidth - # Color of centroid equals the color of the i-th factor - printf " lc rgb '#%s' front\n", color + if self.n_factors == 1 # no factors + string color = "FF0000" + else + string color = colors[self.ith_factor] endif + + # Draw a symbol (optionally with a label) for the i-th centroid + printf "set label %d \"%s\" at %g,%g point ps %g pt %d lw %g", + self.ith_factor, label, cxy[1], cxy[2], self.centroid_pointsize, + self.pointtype[self.ith_factor], self.centroid_linewidth + + # Color of centroid equals the color of the i-th factor + printf " lc rgb '#%s' front\n", color end function -function void write_labels (const string yname, - const string xname, - const int fontsize) - printf "set ylabel '%s' font ',%d'\n", yname, fontsize - printf "set xlabel '%s' font ',%d'\n", xname, fontsize +function void write_labels (const strings vnames, const int fontsize) + printf "set xlabel '%s' font ',%d'\n", vnames[1], fontsize + printf "set ylabel '%s' font ',%d'\n", vnames[2], fontsize end function -function void write_plot_data (const list yx, const bundle self) +function void write_plot_data (const list xy, const bundle self) /* Write data for a single subplot. */ - strings S = array(0) - loop i=1..self.n_factors -q smpl full if self.n_factors > 1 smpl self.factor_series == self.factor_values[i] --restrict endif - smpl --no-missing yx + smpl --no-missing xy - matrix m = {yx} # TODO: for huge data, this may become a bottleneck - cnameset(m, "") # avoid printing variable names to the gp-file - printf "%12.8g", m - printf "\ne\n" + matrix m = {xy} # TODO: for huge data, this may become a bottleneck + cnameset(m, "") # avoid printing variable names to the gp-file + printf "%14.8g", m # width 14 to avoid collisions + printf "e\n" endloop end function @@ -425,20 +431,20 @@ function void print_using_cmd (const bundle self, const int idx) printf "'-' using 1:2 w %s", self.plot_type if self.plot_type == "points" - printf " pt %d", self.pointtype[idx] + printf " pt %d", self.pointtype[idx] endif end function -function void print_plot_title (const bundle self, const int idx) - /* Print title string. */ +function void print_factor_title (const bundle self, const int idx) + /* Print title string for a factor. */ if inbundle(self, "factor_str_values") # If factor series comprises strings, these are used printf " title '%s'", self.factor_str_values[idx] else if self.n_factors == 1 - print " title ''" + print " notitle" else printf " title 'F=%d'", self.factor_values[idx] endif @@ -453,7 +459,7 @@ function void write_plot_cmd (const bundle self) loop i=1..self.n_factors print_using_cmd(self, $i) - print_plot_title(self, $i) + print_factor_title(self, $i) if i < self.n_factors printf ", \\\n" @@ -464,67 +470,93 @@ function void write_plot_cmd (const bundle self) end function -function string set_offsets (const scalar offset_level) - /* Write settings for offsets - return: string, offsets parameters */ +function void write_offsets (const scalar offset_level) + /* Write settings for offsets */ - string r = "set offsets " - - loop i=1..4 -q - r += sprintf("%.2f", offset_level) - r += i < 4 ? ", " : "" + printf "set offsets " + loop i=1..4 + printf "%.2f", offset_level + printf "%s", i < 4 ? ", " : "" endloop - - return r + printf "\n" end function -function string write_options (const bundle self) - /* Write gnuplot options to buffer and return as string. */ +function void write_key_spec (const bundle self, const list xy, + const scalar k) + /* TBA */ - set force_decpoint on - - string buffer + string pos = self.key_position + scalar do_font = 1 - outfile --buffer=buffer - printf "%s\n", set_offsets(self.offset_level) - - if self.with_key - printf "set key %s font ',%d'\n", self.key_position, - self.key_fontsize + if self.type == "triangle" && self.n_plots > 2 + /* We can avoid collision between the key and actual data + points by writing the key into a blank region of the grid, + for the first plot only. In this case use the regular + plot font. + */ + if k == 1 + pos = "at screen 0.6, screen 0.95" + do_font = 0 else - printf "set nokey\n" + pos = "" endif - if self.tics - printf "set xtics font ',%d'\n", self.tics_fontsize - printf "set ytics font ',%d' \n", self.tics_fontsize + elif self.type == "matrix" && self.n_plots > 4 + /* Write just one key (?), and try for best position to + avoid collision with data + */ + if k == 1 + if pos == "top left" && corr(xy[1], xy[2]) < -0.2 + pos = "top right" + endif else - printf "set noxtics\n" - printf "set noytics\n" + pos = "" endif + endif - if self.grid - printf "set style line 12 lc rgb '#808080' lt 0 lw 1\n" - printf "set grid ls 12\n" - endif + if pos == "" + printf "set nokey\n" + elif do_font == 0 + printf "set key %s\n", pos + else + printf "set key %s font ',%d'\n", pos, self.key_fontsize + endif +end function - printf "set pointsize %g\n", self.pointsize - # No documented yet, as it errors - /* - if ok(self.y_logscale) - printf "set logscale y %d\n", self.y_logscale - endif - if ok(self.x_logscale) - printf "set logscale x %d\n", self.x_logscale - endif - */ +function void write_options (const bundle self, const list xy, + scalar k) + /* Write gnuplot options. */ - end outfile + write_offsets(self.offset_level) - set force_decpoint off + if self.with_key + write_key_spec(self, xy, k) + else + printf "set nokey\n" + endif + if self.tics + printf "set xtics font ',%d'\n", self.tics_fontsize + printf "set ytics font ',%d' \n", self.tics_fontsize + else + printf "set noxtics\n" + printf "set noytics\n" + endif + if self.grid + printf "set style line 12 lc rgb '#808080' lt 0 lw 1\n" + printf "set grid ls 12\n" + endif + printf "set pointsize %g\n", self.pointsize - return buffer + # Not documented yet, as it errors + /* + if ok(self.y_logscale) + printf "set logscale y %d\n", self.y_logscale + endif + if ok(self.x_logscale) + printf "set logscale x %d\n", self.x_logscale + endif + */ end function @@ -558,19 +590,19 @@ function strings get_color_definitions (void) "C41E3A",\ "4682B4",\ "FFBF00",\ - "ff0000",\ - "00cc00",\ - "8faab3",\ - "0000ff",\ - "bf25b2",\ - "ffa500"\ + "FF0000",\ + "00CC00",\ + "8FAAB3",\ + "0000FF",\ + "BF25B2",\ + "FFA500"\ ) end function function scalar calculate_pointsize (const scalar default_pointsize, const int n_variables) - /* Compute optimal fontsizeo as a function of the number + /* Compute optimal pointsize as a function of the number of variables to plot. */ scalar pointsize = xmin(5, default_pointsize / sqrt(n_variables)) @@ -580,7 +612,7 @@ end function function scalar calculate_fontsize (const scalar default_fontsize, const int n_variables) - /* Compute optimal fontsizeo as a function of the number + /* Compute optimal fontsize as a function of the number of variables to plot. */ scalar fontsize = xmin(20, default_fontsize / sqrt(n_variables)) @@ -608,3 +640,33 @@ function scalar calculate_plot_height (const scalar default_height, return height end function + + +function matrix get_layout_matrix (int n) + /* construct a suitable triangular layout matrix */ + + matrix ret = zeros(n, n) + scalar k = 1 + loop i=1..n + loop j=1..i + ret[i,j] = k++ + endloop + endloop + return ret +end function + + +function scalar call_gnuplot (const list L, const bundle self) + /* Called if just a single plot is to be produced. */ + + string outspec = self.filename + string buffer = write_pp_plots(L, self, "") + string gp_opts = get_gp_opts(self) + + catch gnuplot --inbuf=buffer @gp_opts --output="@outspec" + scalar err = $error + if err + printf "Error in call_gnuplot: %s\n", errmsg(err) + endif + return err +end function From 5ebdf389dc4657ca27f42d630ffa03073cde2378 Mon Sep 17 00:00:00 2001 From: Artur Tarassow Date: Wed, 3 Jul 2024 17:33:26 +0200 Subject: [PATCH 11/11] fix typo; update date --- README.md | 9 +++++++++ src/PairPlot.spec | 2 +- src/PairPlot_help.md | 4 ++-- src/PairPlot_sample.inp | 2 +- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a43a3ee..d294327 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ The user can control the following aspects by adding the respective parameter to - `fontsize`: int, Control the font size of the labels (default: 16) . - `grid`: bool, Draw a grid in the background if TRUE (=1) (default: FALSE) - `height`: scalar, Height of the canvas plot (default: 600). +- `title`: string: set an overall title for the plot(s). - `key`: bool, If the `factor` series is provided, a legend shows the color and point pattern for each distinct value of the `factor` variable. Default: 1 (TRUE). - `key_fontsize` int, Control the font size for the key. Default: 14 - `key_position`: string, Controls the position of the legend in each subplot (use standard gnuplot options). default: "top left". @@ -65,8 +66,16 @@ The user can control the following aspects by adding the respective parameter to - `use_circles`: bool, Draw circles instead of points if set to 1 (TRUE), default: 0 (FALSE). - `width`: scalar, Width of the canvas plot (default: 900). + # Changelog +* **v0.99 (Jule 2024)** + * Add new parameter `title` for setting an overall title + * Internal refactoring: Switch to gretl's built-in gridplot aparatus which means that all the graphics file formats supported by gretl can be used. + * It also means that usage via gretlcli in "display" mode works. + * In case the user's specification calls for just a single plot, no "multiplot" is created. + * The graphical interface is enhanced, with more options. + * **v0.98 (April 2024)** * Make width, height, fontsizes and pointsize a function of the number of variables for better readability in case of many variables. * Set parameter `transparency per default to 90. diff --git a/src/PairPlot.spec b/src/PairPlot.spec index eaa4a7f..36a02da 100644 --- a/src/PairPlot.spec +++ b/src/PairPlot.spec @@ -1,7 +1,7 @@ author = Artur Tarassow and Allin Cottrell email = atecon@posteo.de version = 0.99 -date = 2024-06-10 +date = 2024-07-03 description = Scatterplot matrix with factor separation tags = C88 min-version = 2024a diff --git a/src/PairPlot_help.md b/src/PairPlot_help.md index 9cae15b..d294327 100644 --- a/src/PairPlot_help.md +++ b/src/PairPlot_help.md @@ -69,8 +69,8 @@ The user can control the following aspects by adding the respective parameter to # Changelog -* **v0.99 (June 2024)** - * Add new parameter `title` for setting an oveall title +* **v0.99 (Jule 2024)** + * Add new parameter `title` for setting an overall title * Internal refactoring: Switch to gretl's built-in gridplot aparatus which means that all the graphics file formats supported by gretl can be used. * It also means that usage via gretlcli in "display" mode works. * In case the user's specification calls for just a single plot, no "multiplot" is created. diff --git a/src/PairPlot_sample.inp b/src/PairPlot_sample.inp index da661db..8ddbd49 100644 --- a/src/PairPlot_sample.inp +++ b/src/PairPlot_sample.inp @@ -3,7 +3,7 @@ set verbose off include PairPlot.gfn -scalar SAMPLE = # Select an example, 1 to 6 +scalar SAMPLE = 1 # Select an example, 1 to 6 if SAMPLE == 1 # simple plot of the iris data open iris.gdt --frompkg=PairPlot --quiet