Optimize Object::GetObject. Introduce an array of aggregates and sort is by access frequency.
1.1 --- a/src/core/object.cc Wed Nov 04 22:08:21 2009 +0100
1.2 +++ b/src/core/object.cc Thu Nov 05 21:04:05 2009 +0100
1.3 @@ -28,6 +28,8 @@
1.4 #include "string.h"
1.5 #include <vector>
1.6 #include <sstream>
1.7 +#include <stdlib.h>
1.8 +#include <string.h>
1.9
1.10 NS_LOG_COMPONENT_DEFINE ("Object");
1.11
1.12 @@ -40,28 +42,23 @@
1.13 NS_OBJECT_ENSURE_REGISTERED (Object);
1.14
1.15 Object::AggregateIterator::AggregateIterator ()
1.16 - : m_first (0),
1.17 + : m_object (0),
1.18 m_current (0)
1.19 {}
1.20
1.21 bool
1.22 Object::AggregateIterator::HasNext (void) const
1.23 {
1.24 - if (m_current != 0 && m_current->m_next != PeekPointer (m_first))
1.25 - {
1.26 - return true;
1.27 - }
1.28 - return false;
1.29 + return m_current < m_object->m_aggregates->n;
1.30 }
1.31 Ptr<const Object>
1.32 Object::AggregateIterator::Next (void)
1.33 {
1.34 - m_current = m_current->m_next;
1.35 - return m_current;
1.36 + return m_object->m_aggregates->buffer[m_current];
1.37 }
1.38 -Object::AggregateIterator::AggregateIterator (Ptr<const Object> first)
1.39 - : m_first (first),
1.40 - m_current (first)
1.41 +Object::AggregateIterator::AggregateIterator (Ptr<const Object> object)
1.42 + : m_object (object),
1.43 + m_current (0)
1.44 {}
1.45
1.46
1.47 @@ -85,18 +82,45 @@
1.48 : m_count (1),
1.49 m_tid (Object::GetTypeId ()),
1.50 m_disposed (false),
1.51 - m_next (this)
1.52 -{}
1.53 + m_aggregates ((struct Aggregates *)malloc (sizeof (struct Aggregates))),
1.54 + m_getObjectCount (0)
1.55 +{
1.56 + m_aggregates->n = 1;
1.57 + m_aggregates->buffer[0] = this;
1.58 +}
1.59 Object::~Object ()
1.60 {
1.61 - m_next = 0;
1.62 + // remove this object from the aggregate list
1.63 + uint32_t n = m_aggregates->n;
1.64 + for (uint32_t i = 0; i < n; i++)
1.65 + {
1.66 + Object *current = m_aggregates->buffer[i];
1.67 + if (current == this)
1.68 + {
1.69 + memmove (&m_aggregates->buffer[i],
1.70 + &m_aggregates->buffer[i+1],
1.71 + sizeof (Object *)*(m_aggregates->n - (i+1)));
1.72 + m_aggregates->n--;
1.73 + }
1.74 + }
1.75 + // finally, if all objects have been removed from the list,
1.76 + // delete the aggregate list
1.77 + if (m_aggregates->n == 0)
1.78 + {
1.79 + free (m_aggregates);
1.80 + }
1.81 + m_aggregates = 0;
1.82 }
1.83 Object::Object (const Object &o)
1.84 : m_count (1),
1.85 m_tid (o.m_tid),
1.86 m_disposed (false),
1.87 - m_next (this)
1.88 -{}
1.89 + m_aggregates ((struct Aggregates *)malloc (sizeof (struct Aggregates))),
1.90 + m_getObjectCount (0)
1.91 +{
1.92 + m_aggregates->n = 1;
1.93 + m_aggregates->buffer[0] = this;
1.94 +}
1.95 uint32_t
1.96 Object::GetReferenceCount (void) const
1.97 {
1.98 @@ -112,48 +136,58 @@
1.99 Object::DoGetObject (TypeId tid) const
1.100 {
1.101 NS_ASSERT (CheckLoose ());
1.102 - const Object *currentObject = this;
1.103 - const Object *prevObject = 0;
1.104 +
1.105 + uint32_t n = m_aggregates->n;
1.106 TypeId objectTid = Object::GetTypeId ();
1.107 - do {
1.108 - NS_ASSERT (currentObject != 0);
1.109 - TypeId cur = currentObject->GetInstanceTypeId ();
1.110 - while (cur != tid && cur != objectTid)
1.111 - {
1.112 - cur = cur.GetParent ();
1.113 - }
1.114 - if (cur == tid)
1.115 - {
1.116 - if (prevObject != 0)
1.117 - {
1.118 - // This is an attempt to 'cache' the result of this lookup.
1.119 - // the idea is that if we perform a lookup for a TypdId on this object,
1.120 - // we are likely to perform the same lookup later so, we re-order
1.121 - // the circular linked-list of objects here by putting the object we
1.122 - // just found at the head of the list. This optimization is
1.123 - // _extremely_ effective in general.
1.124 - const_cast<Object*>(prevObject)->m_next = currentObject->m_next;
1.125 - const_cast<Object*>(currentObject)->m_next = m_next;
1.126 - const_cast<Object*>(this)->m_next = (Object*)currentObject;
1.127 - }
1.128 - return const_cast<Object *> (currentObject);
1.129 - }
1.130 - prevObject = currentObject;
1.131 - currentObject = currentObject->m_next;
1.132 - } while (currentObject != this);
1.133 + for (uint32_t i = 0; i < n; i++)
1.134 + {
1.135 + Object *current = m_aggregates->buffer[i];
1.136 + TypeId cur = current->GetInstanceTypeId ();
1.137 + while (cur != tid && cur != objectTid)
1.138 + {
1.139 + cur = cur.GetParent ();
1.140 + }
1.141 + if (cur == tid)
1.142 + {
1.143 + // This is an attempt to 'cache' the result of this lookup.
1.144 + // the idea is that if we perform a lookup for a TypeId on this object,
1.145 + // we are likely to perform the same lookup later so, we make sure
1.146 + // that the aggregate array is sorted by the number of accesses
1.147 + // to each object.
1.148 +
1.149 + // first, increment the access count
1.150 + current->m_getObjectCount++;
1.151 + // then, update the sort
1.152 + UpdateSortedArray (m_aggregates, i);
1.153 + // finally, return the match
1.154 + return const_cast<Object *> (current);
1.155 + }
1.156 + }
1.157 return 0;
1.158 }
1.159 void
1.160 Object::Dispose (void)
1.161 {
1.162 - Object *current = this;
1.163 - do {
1.164 - NS_ASSERT (current != 0);
1.165 - NS_ASSERT (!current->m_disposed);
1.166 - current->DoDispose ();
1.167 - current->m_disposed = true;
1.168 - current = current->m_next;
1.169 - } while (current != this);
1.170 + uint32_t n = m_aggregates->n;
1.171 + for (uint32_t i = 0; i < n; i++)
1.172 + {
1.173 + Object *current = m_aggregates->buffer[i];
1.174 + NS_ASSERT (!current->m_disposed);
1.175 + current->DoDispose ();
1.176 + current->m_disposed = true;
1.177 + }
1.178 +}
1.179 +void
1.180 +Object::UpdateSortedArray (struct Aggregates *aggregates, uint32_t j) const
1.181 +{
1.182 + while (j > 0 &&
1.183 + aggregates->buffer[j]->m_getObjectCount > aggregates->buffer[j-1]->m_getObjectCount)
1.184 + {
1.185 + Object *tmp = aggregates->buffer[j-1];
1.186 + aggregates->buffer[j-1] = aggregates->buffer[j];
1.187 + aggregates->buffer[j] = tmp;
1.188 + j--;
1.189 + }
1.190 }
1.191 void
1.192 Object::AggregateObject (Ptr<Object> o)
1.193 @@ -171,20 +205,38 @@
1.194 }
1.195
1.196 Object *other = PeekPointer (o);
1.197 - Object *next = m_next;
1.198 - m_next = other->m_next;
1.199 - other->m_next = next;
1.200 - NS_ASSERT (CheckLoose ());
1.201 - NS_ASSERT (o->CheckLoose ());
1.202 - // call NotifyNewAggregate in the listed chain
1.203 - Object *currentObject = this;
1.204 - do
1.205 + // first create the new aggregate buffer.
1.206 + uint32_t total = m_aggregates->n + other->m_aggregates->n;
1.207 + struct Aggregates *aggregates =
1.208 + (struct Aggregates *)malloc (sizeof(struct Aggregates)+(total-1)*sizeof(Object*));
1.209 + aggregates->n = total;
1.210 + memcpy (&aggregates->buffer[0],
1.211 + &m_aggregates->buffer[0],
1.212 + m_aggregates->n*sizeof(Object*));
1.213 + // append the other aggregates in the new buffer
1.214 + for (uint32_t i = 0; i < other->m_aggregates->n; i++)
1.215 {
1.216 - // the NotifyNewAggregate of the current object implementation
1.217 - // should be called on the next object in the linked chain
1.218 - currentObject->NotifyNewAggregate ();
1.219 - currentObject = currentObject->m_next;
1.220 - } while (currentObject != this);
1.221 + aggregates->buffer[m_aggregates->n+i] = other->m_aggregates->buffer[i];
1.222 + UpdateSortedArray (aggregates, m_aggregates->n + i);
1.223 + }
1.224 +
1.225 + // free both aggregate buffers
1.226 + free (m_aggregates);
1.227 + free (other->m_aggregates);
1.228 +
1.229 + // Then, assign that buffer to every object
1.230 + uint32_t n = aggregates->n;
1.231 + for (uint32_t i = 0; i < n; i++)
1.232 + {
1.233 + Object *current = aggregates->buffer[i];
1.234 + current->m_aggregates = aggregates;
1.235 + }
1.236 + // Finally, call NotifyNewAggregate in the listed chain
1.237 + for (uint32_t i = 0; i < n; i++)
1.238 + {
1.239 + Object *current = m_aggregates->buffer[i];
1.240 + current->NotifyNewAggregate ();
1.241 + }
1.242 }
1.243 /**
1.244 * This function must be implemented in the stack that needs to notify
1.245 @@ -233,14 +285,12 @@
1.246 Object::CheckLoose (void) const
1.247 {
1.248 uint32_t refcount = 0;
1.249 - const Object *current = this;
1.250 - do
1.251 + uint32_t n = m_aggregates->n;
1.252 + for (uint32_t i = 0; i < n; i++)
1.253 {
1.254 + Object *current = m_aggregates->buffer[i];
1.255 refcount += current->m_count;
1.256 - current = current->m_next;
1.257 }
1.258 - while (current != this);
1.259 -
1.260 return (refcount > 0);
1.261 }
1.262
1.263 @@ -249,38 +299,38 @@
1.264 {
1.265 // First, check if any of the attached
1.266 // Object has a non-zero count.
1.267 - const Object *current = this;
1.268 - do {
1.269 - NS_ASSERT (current != 0);
1.270 - if (current->m_count != 0)
1.271 - {
1.272 - return;
1.273 - }
1.274 - current = current->m_next;
1.275 - } while (current != this);
1.276 + uint32_t n = m_aggregates->n;
1.277 + for (uint32_t i = 0; i < n; i++)
1.278 + {
1.279 + Object *current = m_aggregates->buffer[i];
1.280 + if (current->m_count != 0)
1.281 + {
1.282 + return;
1.283 + }
1.284 + }
1.285
1.286 // Ensure we are disposed.
1.287 - Object *tmp = const_cast<Object *> (this);
1.288 - const Object *end = this;
1.289 - do {
1.290 - NS_ASSERT (current != 0);
1.291 - Object *next = tmp->m_next;
1.292 - if (!tmp->m_disposed)
1.293 - {
1.294 - tmp->DoDispose ();
1.295 - }
1.296 - tmp = next;
1.297 - } while (tmp != end);
1.298 + for (uint32_t i = 0; i < n; i++)
1.299 + {
1.300 + Object *current = m_aggregates->buffer[i];
1.301 + if (!current->m_disposed)
1.302 + {
1.303 + current->DoDispose ();
1.304 + }
1.305 + }
1.306
1.307 // all attached objects have a zero count so,
1.308 - // we can delete all attached objects.
1.309 - current = this;
1.310 - do {
1.311 - NS_ASSERT (current != 0);
1.312 - Object *next = current->m_next;
1.313 - delete current;
1.314 - current = next;
1.315 - } while (current != end);
1.316 + // we can delete them all.
1.317 + struct Aggregates *aggregates = m_aggregates;
1.318 + for (uint32_t i = 0; i < n; i++)
1.319 + {
1.320 + // There is a trick here: each time we call delete below,
1.321 + // the deleted object is removed from the aggregate buffer
1.322 + // in the destructor so, the index of the next element to
1.323 + // lookup is always zero
1.324 + Object *current = aggregates->buffer[0];
1.325 + delete current;
1.326 + }
1.327 }
1.328 } // namespace ns3
1.329
2.1 --- a/src/core/object.h Wed Nov 04 22:08:21 2009 +0100
2.2 +++ b/src/core/object.h Thu Nov 05 21:04:05 2009 +0100
2.3 @@ -85,9 +85,9 @@
2.4 Ptr<const Object> Next (void);
2.5 private:
2.6 friend class Object;
2.7 - AggregateIterator (Ptr<const Object> first);
2.8 - Ptr<const Object> m_first;
2.9 - Ptr<const Object> m_current;
2.10 + AggregateIterator (Ptr<const Object> object);
2.11 + Ptr<const Object> m_object;
2.12 + uint32_t m_current;
2.13 };
2.14
2.15 Object ();
2.16 @@ -215,6 +215,21 @@
2.17 friend class ObjectFactory;
2.18 friend class AggregateIterator;
2.19
2.20 + /**
2.21 + * This data structure uses a classic C-style trick to
2.22 + * hold an array of variable size without performing
2.23 + * two memory allocations: the declaration of the structure
2.24 + * declares a one-element array but when we allocate
2.25 + * memory for this struct, we effectively allocate a larger
2.26 + * chunk of memory than the struct to allow space for a larger
2.27 + * variable sized buffer whose size is indicated by the element
2.28 + * 'n'
2.29 + */
2.30 + struct Aggregates {
2.31 + uint32_t n;
2.32 + Object *buffer[1];
2.33 + };
2.34 +
2.35 Ptr<Object> DoGetObject (TypeId tid) const;
2.36 bool Check (void) const;
2.37 bool CheckLoose (void) const;
2.38 @@ -243,6 +258,8 @@
2.39 */
2.40 void Construct (const AttributeList &attributes);
2.41
2.42 + void UpdateSortedArray (struct Aggregates *aggregates, uint32_t i) const;
2.43 +
2.44 /**
2.45 * The reference count for this object. Each aggregate
2.46 * has an individual reference count. When the global
2.47 @@ -261,13 +278,19 @@
2.48 */
2.49 bool m_disposed;
2.50 /**
2.51 - * A pointer to the next aggregate object. This is a circular
2.52 - * linked list of aggregated objects: the last one points
2.53 - * back to the first one. If an object is not aggregated to
2.54 - * any other object, the value of this field is equal to the
2.55 - * value of the 'this' pointer.
2.56 + * a pointer to an array of 'aggregates'. i.e., a pointer to
2.57 + * each object aggregated to this object is stored in this
2.58 + * array. The array is shared by all aggregated objects
2.59 + * so the size of the array is indirectly a reference count.
2.60 */
2.61 - Object *m_next;
2.62 + struct Aggregates * m_aggregates;
2.63 + /**
2.64 + * Indicates the number of times the object was accessed with a
2.65 + * call to GetObject. This integer is used to implement a
2.66 + * heuristic to sort the array of aggregates to put at the start
2.67 + * of the array the most-frequently accessed elements.
2.68 + */
2.69 + uint32_t m_getObjectCount;
2.70 };
2.71
2.72 /**