How can I test whether two JSON objects are equal in python, disregarding the order of lists?
For example ...
JSON document a:
{
"errors": [
{"error": "invalid", "field": "email"},
{"error": "required", "field": "name"}
],
"success": false
}
JSON document b:
{
"success": false,
"errors": [
{"error": "required", "field": "name"},
{"error": "invalid", "field": "email"}
]
}
a
and b
should compare equal, even though the order of the "errors"
lists are different.
This question is related to
python
json
django
comparison
For the following two dicts 'dictWithListsInValue' and 'reorderedDictWithReorderedListsInValue' which are simply reordered versions of each other
dictObj = {"foo": "bar", "john": "doe"}
reorderedDictObj = {"john": "doe", "foo": "bar"}
dictObj2 = {"abc": "def"}
dictWithListsInValue = {'A': [{'X': [dictObj2, dictObj]}, {'Y': 2}], 'B': dictObj2}
reorderedDictWithReorderedListsInValue = {'B': dictObj2, 'A': [{'Y': 2}, {'X': [reorderedDictObj, dictObj2]}]}
a = {"L": "M", "N": dictWithListsInValue}
b = {"L": "M", "N": reorderedDictWithReorderedListsInValue}
print(sorted(a.items()) == sorted(b.items())) # gives false
gave me wrong result i.e. false .
So I created my own cutstom ObjectComparator like this:
def my_list_cmp(list1, list2):
if (list1.__len__() != list2.__len__()):
return False
for l in list1:
found = False
for m in list2:
res = my_obj_cmp(l, m)
if (res):
found = True
break
if (not found):
return False
return True
def my_obj_cmp(obj1, obj2):
if isinstance(obj1, list):
if (not isinstance(obj2, list)):
return False
return my_list_cmp(obj1, obj2)
elif (isinstance(obj1, dict)):
if (not isinstance(obj2, dict)):
return False
exp = set(obj2.keys()) == set(obj1.keys())
if (not exp):
# print(obj1.keys(), obj2.keys())
return False
for k in obj1.keys():
val1 = obj1.get(k)
val2 = obj2.get(k)
if isinstance(val1, list):
if (not my_list_cmp(val1, val2)):
return False
elif isinstance(val1, dict):
if (not my_obj_cmp(val1, val2)):
return False
else:
if val2 != val1:
return False
else:
return obj1 == obj2
return True
dictObj = {"foo": "bar", "john": "doe"}
reorderedDictObj = {"john": "doe", "foo": "bar"}
dictObj2 = {"abc": "def"}
dictWithListsInValue = {'A': [{'X': [dictObj2, dictObj]}, {'Y': 2}], 'B': dictObj2}
reorderedDictWithReorderedListsInValue = {'B': dictObj2, 'A': [{'Y': 2}, {'X': [reorderedDictObj, dictObj2]}]}
a = {"L": "M", "N": dictWithListsInValue}
b = {"L": "M", "N": reorderedDictWithReorderedListsInValue}
print(my_obj_cmp(a, b)) # gives true
which gave me the correct expected output!
Logic is pretty simple:
If the objects are of type 'list' then compare each item of the first list with the items of the second list until found , and if the item is not found after going through the second list , then 'found' would be = false. 'found' value is returned
Else if the objects to be compared are of type 'dict' then compare the values present for all the respective keys in both the objects. (Recursive comparison is performed)
Else simply call obj1 == obj2 . It by default works fine for the object of strings and numbers and for those eq() is defined appropriately .
(Note that the algorithm can further be improved by removing the items found in object2, so that the next item of object1 would not compare itself with the items already found in the object2)
You can write your own equals function:
a == b
Because you're dealing with json, you'll have standard python types: dict
, list
, etc., so you can do hard type checking if type(obj) == 'dict':
, etc.
Rough example (not tested):
def json_equals(jsonA, jsonB):
if type(jsonA) != type(jsonB):
# not equal
return False
if type(jsonA) == dict:
if len(jsonA) != len(jsonB):
return False
for keyA in jsonA:
if keyA not in jsonB or not json_equal(jsonA[keyA], jsonB[keyA]):
return False
elif type(jsonA) == list:
if len(jsonA) != len(jsonB):
return False
for itemA, itemB in zip(jsonA, jsonB):
if not json_equal(itemA, itemB):
return False
else:
return jsonA == jsonB
Another way could be to use json.dumps(X, sort_keys=True)
option:
import json
a, b = json.dumps(a, sort_keys=True), json.dumps(b, sort_keys=True)
a == b # a normal string comparison
This works for nested dictionaries and lists.
Decode them and compare them as mgilson comment.
Order does not matter for dictionary as long as the keys, and values matches. (Dictionary has no order in Python)
>>> {'a': 1, 'b': 2} == {'b': 2, 'a': 1}
True
But order is important in list; sorting will solve the problem for the lists.
>>> [1, 2] == [2, 1]
False
>>> [1, 2] == sorted([2, 1])
True
>>> a = '{"errors": [{"error": "invalid", "field": "email"}, {"error": "required", "field": "name"}], "success": false}'
>>> b = '{"errors": [{"error": "required", "field": "name"}, {"error": "invalid", "field": "email"}], "success": false}'
>>> a, b = json.loads(a), json.loads(b)
>>> a['errors'].sort()
>>> b['errors'].sort()
>>> a == b
True
Above example will work for the JSON in the question. For general solution, see Zero Piraeus's answer.
For others who'd like to debug the two JSON objects (usually, there is a reference and a target), here is a solution you may use. It will list the "path" of different/mismatched ones from target to the reference.
level
option is used for selecting how deep you would like to look into.
show_variables
option can be turned on to show the relevant variable.
def compareJson(example_json, target_json, level=-1, show_variables=False):
_different_variables = _parseJSON(example_json, target_json, level=level, show_variables=show_variables)
return len(_different_variables) == 0, _different_variables
def _parseJSON(reference, target, path=[], level=-1, show_variables=False):
if level > 0 and len(path) == level:
return []
_different_variables = list()
# the case that the inputs is a dict (i.e. json dict)
if isinstance(reference, dict):
for _key in reference:
_path = path+[_key]
try:
_different_variables += _parseJSON(reference[_key], target[_key], _path, level, show_variables)
except KeyError:
_record = ''.join(['[%s]'%str(p) for p in _path])
if show_variables:
_record += ': %s <--> MISSING!!'%str(reference[_key])
_different_variables.append(_record)
# the case that the inputs is a list/tuple
elif isinstance(reference, list) or isinstance(reference, tuple):
for index, v in enumerate(reference):
_path = path+[index]
try:
_target_v = target[index]
_different_variables += _parseJSON(v, _target_v, _path, level, show_variables)
except IndexError:
_record = ''.join(['[%s]'%str(p) for p in _path])
if show_variables:
_record += ': %s <--> MISSING!!'%str(v)
_different_variables.append(_record)
# the actual comparison about the value, if they are not the same, record it
elif reference != target:
_record = ''.join(['[%s]'%str(p) for p in path])
if show_variables:
_record += ': %s <--> %s'%(str(reference), str(target))
_different_variables.append(_record)
return _different_variables
Source: Stackoverflow.com