Skip to content

Commit

Permalink
determenistic ids for pois with multiple address
Browse files Browse the repository at this point in the history
  • Loading branch information
kiselev-dv committed Feb 2, 2017
1 parent bf3c864 commit 6b2bd1c
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 45 deletions.
8 changes: 8 additions & 0 deletions Gazetteer/src/main/java/me/osm/gazetter/LOGMarkers.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@

public class LOGMarkers {

// OSM Data validation markers

public static final Marker E_INTRPLTN_NO_ADDR_POINT
= MarkerFactory.getMarker("E_INTRPLTN_NO_ADDR_ON_POINT");

public static final Marker E_INTRPLTN_DIF_STREETS
= MarkerFactory.getMarker("E_INTRPLTN_DIF_STREETS");

public static final Marker E_WAY_ONLY_TWO_EQAL_POINTS
= MarkerFactory.getMarker("E_WAY_ONLY_TWO_EQAL_POINTS");

//----------------------------------------------------------

public static final Marker E_NO_POINTS_FOR_RELATION
Expand All @@ -30,5 +35,8 @@ public class LOGMarkers {

public static final Marker E_NO_ASSOCIATED_STREET_FOUND
= MarkerFactory.getMarker("E_NO_ASSOCIATED_STREET_FOUND");

public static final Marker E_INVALID_NAN_POI_PNT
= MarkerFactory.getMarker("E_INVALID_NAN_POI_PNT");

}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import org.joda.time.DateTime;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Read Old file and get timestamps and md5 hashes of strings
Expand All @@ -16,6 +18,8 @@
*/
public final class DiffOldFileFirstPassReader implements LineHandler {

private static final Logger log = LoggerFactory.getLogger(DiffOldFileFirstPassReader.class);

private final Counters counters;
private TreeMap<String, Object[]> map;

Expand All @@ -41,7 +45,11 @@ public void handle(String s) {
DateTime timestamp = GeoJsonWriter.getTimestamp(s);
String md5 = GeoJsonWriter.getMD5(s);

map.put(id, new Object[]{md5, timestamp});
Object[] previous = map.put(id, new Object[]{md5, timestamp});
if (previous != null) {
log.warn("Different lines with the same id. first md5: {} second md5: {} id: {}",
previous[0], md5, id);
}
}
}
}
133 changes: 105 additions & 28 deletions Gazetteer/src/main/java/me/osm/gazetter/join/JoinSliceRunable.java
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ public class JoinSliceRunable implements Runnable {

private boolean dropHghNetGeometries;

private boolean checkDoubledPOIIds = false;


/**
* @param handler
Expand Down Expand Up @@ -355,8 +357,15 @@ private long debug(String msg, long s) {
@SuppressWarnings("unchecked")
private void mergePois() {
for(JSONObject poi : pois) {

// Was already merged, just skip
if("remove".equals(poi.optString("action"))) {
continue;
}

JSONObject meta = poi.getJSONObject(GeoJsonWriter.META);
JSONObject fullGeometry = meta.optJSONObject("fullGeometry");

if(fullGeometry != null && "Polygon".equals(fullGeometry.getString("type"))) {
Polygon poly = GeoJsonWriter.getPolygonGeometry(
fullGeometry.getJSONArray(GeoJsonWriter.COORDINATES));
Expand All @@ -365,60 +374,127 @@ private void mergePois() {
if(dubles.size() > 1) {
//remove self
dubles.remove(poi);

// match by type
filterMatchedPois(poi, dubles);
JSONObject poiProperties = poi.getJSONObject(GeoJsonWriter.PROPERTIES);

Iterator<JSONObject> iterator = dubles.iterator();
while(iterator.hasNext()) {
JSONObject matched = iterator.next();
JSONArray coords = matched.getJSONObject(GeoJsonWriter.GEOMETRY).getJSONArray(GeoJsonWriter.COORDINATES);
if (dubles.size() != 0) {

Point centroid = factory.createPoint(new Coordinate(coords.getDouble(0), coords.getDouble(1)));
if(!poly.contains(centroid)) {
iterator.remove();
continue;
}

matched.put("action", "remove");
String poiId = poi.getString("id");
matched.put("actionDetailed",
"Remove merged with polygonal boundary poi point." + poiId);
JSONObject poiProperties = poi.getJSONObject(GeoJsonWriter.PROPERTIES);

JSONObject matchedProperties = matched.getJSONObject(GeoJsonWriter.PROPERTIES);
for(String key : (Set<String>)matchedProperties.keySet()) {
if(!poiProperties.has(key)) {
poiProperties.put(key, matchedProperties.get(key));
Iterator<JSONObject> iterator = dubles.iterator();
while(iterator.hasNext()) {
JSONObject matched = iterator.next();
JSONArray coords = matched.getJSONObject(GeoJsonWriter.GEOMETRY).getJSONArray(GeoJsonWriter.COORDINATES);

// Check that point actually fell inside polygon
Point centroid = factory.createPoint(new Coordinate(coords.getDouble(0), coords.getDouble(1)));
if(!poly.contains(centroid)) {
iterator.remove();
continue;
}

matched.put("action", "remove");
String poiId = poi.getString("id");
matched.put("actionDetailed",
"Remove merged with polygonal boundary poi point." + poiId);

JSONObject matchedProperties = matched.getJSONObject(GeoJsonWriter.PROPERTIES);
for(String key : (Set<String>)matchedProperties.keySet()) {
if(!poiProperties.has(key)) {
poiProperties.put(key, matchedProperties.get(key));
}
}
}

reassignPOICentroid(poi, dubles);

}
}
}
}

for(Iterator<JSONObject> i = pois.iterator();i.hasNext(); ) {
if("remove".equals(i.next().optString("action"))) {
i.remove();
}
}

if (checkDoubledPOIIds) {
Collections.sort(pois, new Comparator<JSONObject>() {

@Override
public int compare(JSONObject o1, JSONObject o2) {
String id1 = o1.getString("id");
String id2 = o2.getString("id");

//move center to the poi point instead of centroid
if(dubles.size() == 1) {
JSONArray coords = dubles.get(0).getJSONObject(GeoJsonWriter.GEOMETRY).getJSONArray(GeoJsonWriter.COORDINATES);
poi.getJSONObject(GeoJsonWriter.GEOMETRY).put(GeoJsonWriter.COORDINATES, coords);
int strcmp = id1.compareTo(id2);
if (strcmp == 0) {
o1.toString().compareTo(o2.toString());
}

return strcmp;
}

});

String lastId = null;
for(Iterator<JSONObject> i = pois.iterator();i.hasNext(); ) {
String id = i.next().getString("id");
if(id.equals(lastId)) {
i.remove();
}
else {
lastId = id;
}
}
}
}

/**
* Move center to the poi point instead of centroid
* */
private void reassignPOICentroid(JSONObject poi, List<JSONObject> dubles) {
if(dubles.size() >= 1) {
JSONArray coords = null;
for (JSONObject obj : dubles) {
JSONArray c = obj.getJSONObject(GeoJsonWriter.GEOMETRY)
.getJSONArray(GeoJsonWriter.COORDINATES);

// Write centroid idempotentially
if (coords == null || c.toString().compareTo(coords.toString()) < 0) {
coords = c;
}
}
poi.getJSONObject(GeoJsonWriter.GEOMETRY).put(GeoJsonWriter.COORDINATES, coords);
}
}

private void filterMatchedPois(JSONObject poi,
List<JSONObject> dubles) {

String clazz = poi.getJSONArray("poiTypes").getString(0);
JSONArray subjTypes = poi.getJSONArray("poiTypes");

Iterator<JSONObject> iterator = dubles.iterator();
while (iterator.hasNext()) {

JSONObject candidate = iterator.next();
if(!clazz.equals(candidate.getJSONArray("poiTypes"))) {
JSONArray candidateTypes = candidate.getJSONArray("poiTypes");
if(!intersects(subjTypes, candidateTypes)) {
iterator.remove();
}
}
}

private boolean intersects(JSONArray subjTypes, JSONArray candidateTypes) {
for(int i = 0; i < subjTypes.length(); i++) {
String subjType = subjTypes.getString(i);
for(int j = 0; j < candidateTypes.length(); j++) {
if (subjType.equals(candidateTypes.getString(j))) {
return true;
}
}
}
return false;
}

private void initializeMaps() {
addr2streets = new HashMap<JSONObject, List<JSONObject>>(addrPoints.size());
addr2bndries = new HashMap<JSONObject, List<JSONObject>>(addrPoints.size());
Expand Down Expand Up @@ -717,6 +793,7 @@ private void joinPoi2Addresses() {

void handleOut(JSONObject poi) {
if(poi != null) {

for(JoinOutHandler handler : Options.get().getJoinOutHandlers()) {
try {
handler.handle(poi, this.src.getName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import me.osm.gazetter.out.AddrRowValueExctractorImpl;
import me.osm.gazetter.striper.FeatureTypes;
import me.osm.gazetter.striper.JSONFeature;

import org.json.JSONArray;
import org.json.JSONObject;
Expand Down Expand Up @@ -287,15 +288,22 @@ private boolean getAddressesFromCollection(List<JSONObject> result,

boolean founded = false;

JSONArray contains = joinedAddresses.optJSONArray("contains");
JSONArray contains = joinedAddresses.optJSONArray(key);
if(contains != null && contains.length() > 0) {

for(int ci = 0; ci < contains.length(); ci++) {
JSONObject co = contains.getJSONObject(ci);
JSONArray addresses = co.optJSONArray("addresses");
if(addresses != null) {
for(int i = 0; i < addresses.length(); i++) {
result.add(addresses.getJSONObject(i));
JSONObject rowJsonObject = JSONFeature.copy(addresses.getJSONObject(i));
String id = co.optString("id");

if (id != null) {
rowJsonObject.put("linked-addr-id", id);
}

result.add(rowJsonObject);
founded = true;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ protected void handleAdminBoundaryAddrRow(JSONObject object,
protected boolean fillObject(JSONFeature result, JSONObject addrRow, JSONObject jsonObject) {

try {

String ftype = jsonObject.getString("ftype");
String rowId = AddrRowValueExctractorImpl.getUID(jsonObject, addrRow, ftype);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
import me.osm.gazetter.addresses.AddressesSchemesParser;
import me.osm.gazetter.striper.FeatureTypes;
import me.osm.gazetter.striper.GeoJsonWriter;
import me.osm.gazetter.utils.JSONHash;

import org.apache.commons.lang3.StringUtils;
import org.json.JSONObject;

public class AddrRowValueExctractorImpl implements AddrRowValueExtractor {
Expand Down Expand Up @@ -144,10 +146,18 @@ public static String getUID(JSONObject jsonObject, JSONObject addrRow,
return jsonObject.getString("id") + "--" + addrType;
}

// POI Can has more than one address with the same addrType
if(FeatureTypes.POI_FTYPE.equals(ftype)) {
String addrType = addrRow.optString(AddressesSchemesParser.ADDR_SCHEME);
String baseId = jsonObject.getString("id");

String linkedId = addrRow.optString("linked-addr-id");

return jsonObject.getString("id") + "--" + addrType;
if (StringUtils.isNotEmpty(linkedId)) {
baseId += "-" + StringUtils.split(linkedId,'-')[2];
}

return baseId + "--" + addrType;
}

return jsonObject.getString("id");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

import me.osm.gazetter.LOGMarkers;
import me.osm.gazetter.Options;
import me.osm.gazetter.dao.FileWriteDao;
import me.osm.gazetter.dao.WriteDao;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,17 @@ public int compare(ByteBuffer row, ByteBuffer key) {

if (handler != null) {
if (coords.size() > 1) {
LineString linestring = factory.createLineString(coords
.toArray(new Coordinate[coords.size()]));
handler.handleHighway(linestring, line);

if(coords.size() == 2 && coords.get(0).equals(coords.get(1))) {
log.warn(LOGMarkers.E_WAY_ONLY_TWO_EQAL_POINTS,
"Way has only two eqal poins. id_way_osm({})", line.id);
}
else {
LineString linestring = factory.createLineString(coords
.toArray(new Coordinate[coords.size()]));
handler.handleHighway(linestring, line);
}

}
}
}
Expand Down
Loading

0 comments on commit 6b2bd1c

Please sign in to comment.