Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue 377 #394

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 85 additions & 8 deletions src/example/org/deidentifier/arx/examples/Example39.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,14 @@

import java.io.File;
import java.io.FilenameFilter;
import java.io.IOError;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.security.SecureRandom;
import java.text.ParseException;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand All @@ -32,16 +37,24 @@
import org.deidentifier.arx.AttributeType;
import org.deidentifier.arx.AttributeType.Hierarchy;
import org.deidentifier.arx.Data;
import org.deidentifier.arx.DataSubset;
import org.deidentifier.arx.DataType;
import org.deidentifier.arx.aggregates.ClassificationConfigurationLogisticRegression;
import org.deidentifier.arx.aggregates.ClassificationConfigurationNaiveBayes;
import org.deidentifier.arx.aggregates.ClassificationConfigurationRandomForest;
import org.deidentifier.arx.criteria.Inclusion;
import org.deidentifier.arx.criteria.KAnonymity;
import org.deidentifier.arx.io.CSVHierarchyInput;
import org.deidentifier.arx.metric.Metric;

/**
* This class implements an example on how to compare data mining performance
*
* The evaluation can be used with either K-fold cross validation (default) or with
* subset for training and different subset for testing
*
* @author Fabian Prasser
* @author Florian Kohlmayer
* @author Ibraheem Al-Dhamari
*/
public class Example39 extends Example {

Expand Down Expand Up @@ -83,7 +96,25 @@ public boolean accept(File dir, String name) {

return data;
}

public static Set<Integer> getRandomDataSubsetIndices(double dataSize, Data inputData, int numRecords) {

if (dataSize < 0d || dataSize > 1d) {
System.out.println(" data size ratio is out of range");
throw new IOError(new Exception());
}

// Create a data subset via sampling based on beta
Set<Integer> subsetIndices = new HashSet<Integer>();
Random random = new SecureRandom();
for (int i = 0; i < numRecords; ++i) {
if (random.nextDouble() < dataSize) {
subsetIndices.add(i);
}
}
return subsetIndices;
}

/**
* Entry point.
*
Expand Down Expand Up @@ -111,18 +142,64 @@ public static void main(String[] args) throws ParseException, IOException {
data.getDefinition().setDataType("age", DataType.INTEGER);
data.getDefinition().setResponseVariable("marital-status", true);


ARXAnonymizer anonymizer = new ARXAnonymizer();

ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(5));
config.setSuppressionLimit(1d);
config.setQualityModel(Metric.createClassificationMetric());

// Create a training subset data with a specific percentage of the original data e.g 80%
double dataSize = 0.80;

// Creating a view from the original dataset
Set<Integer> subsetIndicesTrain = getRandomDataSubsetIndices(dataSize, data, data.getHandle().getNumRows()) ;
DataSubset datasubTrain = DataSubset.create(data.getHandle().getNumRows(), subsetIndicesTrain);

// Adding the data subset to the current configuration,
// this subset will be used for the anonymization,
// other records will be transformed but only suppressed,
// In the training, only the subset will be used
config.addPrivacyModel(new Inclusion (datasubTrain) );

config.setSuppressionLimit(1d);
config.setQualityModel(Metric.createClassificationMetric());

// Start the anonymization process
ARXResult result = anonymizer.anonymize(data, config);
System.out.println("5-anonymous dataset (logistic regression)");
System.out.println(result.getOutput().getStatistics().getClassificationPerformance(features, clazz, ARXClassificationConfiguration.createLogisticRegression()));
System.out.println("5-anonymous dataset (naive bayes)");
System.out.println(result.getOutput().getStatistics().getClassificationPerformance(features, clazz, ARXClassificationConfiguration.createNaiveBayes()));
System.out.println("5-anonymous dataset (random forest)");
System.out.println(result.getOutput().getStatistics().getClassificationPerformance(features, clazz, ARXClassificationConfiguration.createRandomForest()));

System.out.println("===============================================");
System.out.println(" 5-anonymous dataset (logistic regression)");
System.out.println("===============================================");
ClassificationConfigurationLogisticRegression logisticClassifier = ARXClassificationConfiguration.createLogisticRegression();
System.out.println("Evaluation using K-fold cross validation: ...............");
logisticClassifier.setEvaluateWithKfold(true);
System.out.println(result.getOutput().getStatistics().getClassificationPerformance(features, clazz, logisticClassifier));
System.out.println("Evaluation using testing subset: ........................");
logisticClassifier.setEvaluateWithKfold(false);
System.out.println(result.getOutput().getStatistics().getClassificationPerformance(features, clazz, logisticClassifier));

System.out.println("===============================================");
System.out.println(" 5-anonymous dataset (naive bayes)");
System.out.println("===============================================");
System.out.println("Evaluation using K-fold cross validation: ...............");
logisticClassifier.setEvaluateWithKfold(true);
ClassificationConfigurationNaiveBayes naiveBayesClassifier = ARXClassificationConfiguration.createNaiveBayes();
System.out.println(result.getOutput().getStatistics().getClassificationPerformance(features, clazz, naiveBayesClassifier));
System.out.println("Evaluation using testing subset: ........................");
logisticClassifier.setEvaluateWithKfold(false);
System.out.println(result.getOutput().getStatistics().getClassificationPerformance(features, clazz, naiveBayesClassifier));

System.out.println("===============================================");
System.out.println(" 5-anonymous dataset (random forest)");
System.out.println("===============================================");
System.out.println("Evaluation using K-fold cross validation: ...............");
logisticClassifier.setEvaluateWithKfold(true);
ClassificationConfigurationRandomForest randomForestClassifier = ARXClassificationConfiguration.createRandomForest();
System.out.println(result.getOutput().getStatistics().getClassificationPerformance(features, clazz, randomForestClassifier));
System.out.println("Evaluation using testing subset: ........................");
logisticClassifier.setEvaluateWithKfold(false);
System.out.println(result.getOutput().getStatistics().getClassificationPerformance(features, clazz, randomForestClassifier));
}
}
}
22 changes: 22 additions & 0 deletions src/gui/org/deidentifier/arx/gui/model/Model.java
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,9 @@ public static enum Perspective {
/** Model */
private ModelClassification classificationModel = new ModelClassification();

/** Select K-fold or training/testing subsets for evaluation */
private Boolean showKfoldEvaluation = null;

/* *****************************************
* Information about the last anonymization process
* ******************************************/
Expand Down Expand Up @@ -1343,6 +1346,17 @@ public boolean isSensitiveAttributeSelected() {
return (getInputDefinition().getAttributeType(getSelectedAttribute()) == AttributeType.SENSITIVE_ATTRIBUTE);
}

/**
* Whether to use K-fold for evaluation
* @return the showSuppressedValues
*/
public boolean isKfoldEvaluation() {
if (showKfoldEvaluation == null) {
showKfoldEvaluation = true;
}
return showKfoldEvaluation;
}

/**
* Returns whether visualization is enabled.
*
Expand Down Expand Up @@ -1812,6 +1826,14 @@ public void setSnapshotSizeSnapshot(final double snapshotSize) {
snapshotSizeSnapshot = snapshotSize;
}

/**
* Whether to use K-fold or training/subset for classification evaluation
* @param showKfoldEvaluation the showKfoldEvaluation to set
*/
public void setShowKfoldEvaluation(boolean showKfoldEvaluation) {
this.showKfoldEvaluation = showKfoldEvaluation;
}

/**
* Sets how the subset was defined.
*/
Expand Down
5 changes: 4 additions & 1 deletion src/gui/org/deidentifier/arx/gui/model/ModelEvent.java
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,10 @@ public static enum ModelPart {
RESPONSE_VARIABLES,

/** ATTRIBUTE_TYPE */
ATTRIBUTE_TYPE_BULK_UPDATE
ATTRIBUTE_TYPE_BULK_UPDATE,

/** SHOW K_FOLD EVALUATION */
SHOW_KFOLD_EVALUATION
}

/** The part of the model that has changed. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,7 @@ StatisticsView.9=Classification models
StatisticsView.10=Classification performance
StatisticsView.12=ROC curves
StatisticsView.11=Quality models
StatisticsView.13=Show K-fold evaluation
WorkerAnonymize.0=Anonymizing
WorkerAnonymize.1=Task interrupted\!
WorkerAnonymize.2=Calculating score
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,20 @@ public class LayoutUtilityStatistics implements ILayout, IView {
private static final String TAB_CLASSIFICATION_ANALYSIS = Resources.getMessage("StatisticsView.9"); //$NON-NLS-1$

/** View */
private final ComponentTitledFolder folder;
private final ComponentTitledFolder folder;

/** View */
private final ToolItem enable;
private final ToolItem chkboxVisualization; //enable

/** View */
private final ToolItem showKfold;


/** View */
private final Image enabled;
private final Image iconVisualizationEnabled;//enabled

/** View */
private final Image disabled;
private final Image iconVisualizationDisabled; //disabled

/** View */
private final Map<Composite, String> helpids = new HashMap<Composite, String>();
Expand All @@ -108,21 +112,32 @@ public LayoutUtilityStatistics(final Composite parent,
final ModelPart target,
final ModelPart reset) {

this.enabled = controller.getResources().getManagedImage("tick.png"); //$NON-NLS-1$
this.disabled = controller.getResources().getManagedImage("cross.png"); //$NON-NLS-1$
this.iconVisualizationEnabled = controller.getResources().getManagedImage("tick.png"); //$NON-NLS-1$
this.iconVisualizationDisabled = controller.getResources().getManagedImage("cross.png"); //$NON-NLS-1$
this.controller = controller;

controller.addListener(ModelPart.MODEL, this);
controller.addListener(ModelPart.SELECTED_UTILITY_VISUALIZATION, this);
controller.addListener(ModelPart.SHOW_KFOLD_EVALUATION, this);

// Create show K-fold button
final String kfoldButtonLabel = Resources.getMessage("StatisticsView.13"); //$NON-NLS-1$

// Create enable/disable button
final String label = Resources.getMessage("StatisticsView.3"); //$NON-NLS-1$
ComponentTitledFolderButtonBar bar = new ComponentTitledFolderButtonBar("id-50", helpids); //$NON-NLS-1$
bar.add(label, disabled, true, new Runnable() { @Override public void run() {
toggleEnabled();
toggleImage();
}});
bar.add(kfoldButtonLabel, controller.getResources().getManagedImage("tickKFold.png"), true, //$NON-NLS-1$
new Runnable() {
@Override public void run() {
toggleShowKfold();
}});


// Create visualization enable/disable button
final String visializationButtonLabel = Resources.getMessage("StatisticsView.3"); //$NON-NLS-1$
bar.add(visializationButtonLabel, iconVisualizationDisabled, true, new Runnable() { @Override public void run() {
toggleVisualizationEnabled();
toggleVisualizationIcon();
}});

// Create the tab folder
folder = new ComponentTitledFolder(parent, controller, bar, null, false, true);

Expand All @@ -143,8 +158,10 @@ public LayoutUtilityStatistics(final Composite parent,

// Init folder
this.folder.setSelection(0);
this.enable = folder.getButtonItem(label);
this.enable.setEnabled(false);
this.showKfold = folder.getButtonItem(kfoldButtonLabel);
this.showKfold.setEnabled(true);
this.chkboxVisualization = folder.getButtonItem(visializationButtonLabel);
this.chkboxVisualization.setEnabled(false);

// Set initial visibility
folder.setVisibleItems(Arrays.asList(new String[] { TAB_SUMMARY,
Expand Down Expand Up @@ -188,9 +205,9 @@ public List<String> getVisibleItems() {
@Override
public void reset() {
model = null;
enable.setSelection(true);
enable.setImage(enabled);
enable.setEnabled(false);
chkboxVisualization.setSelection(true);
chkboxVisualization.setImage(iconVisualizationEnabled);
chkboxVisualization.setEnabled(false);
}

/**
Expand Down Expand Up @@ -227,13 +244,18 @@ public void update(ModelEvent event) {

if (event.part == ModelPart.MODEL) {
this.model = (Model)event.data;
this.enable.setEnabled(true);
this.enable.setSelection(model.isVisualizationEnabled());
this.toggleImage();
this.showKfold.setSelection(model.isKfoldEvaluation());
this.showKfold.setEnabled(true);
this.chkboxVisualization.setEnabled(true);
this.chkboxVisualization.setSelection(model.isVisualizationEnabled());
this.toggleVisualizationIcon();
} else if (event.part == ModelPart.SELECTED_UTILITY_VISUALIZATION) {
this.enable.setSelection(model.isVisualizationEnabled());
this.toggleImage();
}
this.chkboxVisualization.setSelection(model.isVisualizationEnabled());
this.toggleVisualizationIcon();
} else if (event.part == ModelPart.SHOW_KFOLD_EVALUATION) {
this.showKfold.setSelection(model.isKfoldEvaluation());
this.showKfold.setEnabled(true);
}
}

/**
Expand All @@ -259,19 +281,29 @@ private void registerView(ViewStatisticsBasic view, String helpid) {
/**
* Toggle visualization enabled.
*/
private void toggleEnabled() {
this.model.setVisualizationEnabled(this.enable.getSelection());
this.controller.update(new ModelEvent(this, ModelPart.SELECTED_UTILITY_VISUALIZATION, enable.getSelection()));
private void toggleVisualizationEnabled() {
this.model.setVisualizationEnabled(this.chkboxVisualization.getSelection());
this.controller.update(new ModelEvent(this, ModelPart.SELECTED_UTILITY_VISUALIZATION, chkboxVisualization.getSelection()));
}

/**
* Toggle image.
*/
private void toggleImage(){
if (enable.getSelection()) {
enable.setImage(enabled);
private void toggleVisualizationIcon(){
if (chkboxVisualization.getSelection()) {
chkboxVisualization.setImage(iconVisualizationEnabled);
} else {
enable.setImage(disabled);
chkboxVisualization.setImage(iconVisualizationDisabled);
}
}
}

/**
* Toggle show Kfold.
*/
private void toggleShowKfold() {
this.model.setShowKfoldEvaluation(this.showKfold.getSelection());
this.model.getClassificationModel().getCurrentConfiguration().setEvaluateWithKfold(this.showKfold.getSelection());
this.controller.update(new ModelEvent(this, ModelPart.SHOW_KFOLD_EVALUATION, showKfold.getSelection()));
}

}
Loading