Optimize Object::GetObject. Introduce an array of aggregates and sort is by access frequency.
authorMathieu Lacage <mathieu.lacage@sophia.inria.fr>
Thu Nov 05 21:04:05 2009 +0100 (3 months ago)
changeset 5529b1f7a3a87887
parent 5524 c409fe8019c9
child 5530 83baafea199f
Optimize Object::GetObject. Introduce an array of aggregates and sort is by access frequency.
src/core/object.cc
src/core/object.h
     1.1 --- a/src/core/object.cc	Wed Nov 04 22:08:21 2009 +0100
     1.2 +++ b/src/core/object.cc	Thu Nov 05 21:04:05 2009 +0100
     1.3 @@ -28,6 +28,8 @@
     1.4  #include "string.h"
     1.5  #include <vector>
     1.6  #include <sstream>
     1.7 +#include <stdlib.h>
     1.8 +#include <string.h>
     1.9  
    1.10  NS_LOG_COMPONENT_DEFINE ("Object");
    1.11  
    1.12 @@ -40,28 +42,23 @@
    1.13  NS_OBJECT_ENSURE_REGISTERED (Object);
    1.14  
    1.15  Object::AggregateIterator::AggregateIterator ()
    1.16 -  : m_first (0),
    1.17 +  : m_object (0),
    1.18      m_current (0)
    1.19  {}
    1.20  
    1.21  bool 
    1.22  Object::AggregateIterator::HasNext (void) const
    1.23  {
    1.24 -  if (m_current != 0 && m_current->m_next != PeekPointer (m_first))
    1.25 -    {
    1.26 -      return true;
    1.27 -    }
    1.28 -  return false;
    1.29 +  return m_current < m_object->m_aggregates->n;
    1.30  }
    1.31  Ptr<const Object> 
    1.32  Object::AggregateIterator::Next (void)
    1.33  {
    1.34 -  m_current = m_current->m_next;
    1.35 -  return m_current;
    1.36 +  return m_object->m_aggregates->buffer[m_current];
    1.37  }
    1.38 -Object::AggregateIterator::AggregateIterator (Ptr<const Object> first)
    1.39 -  : m_first (first),
    1.40 -    m_current (first)
    1.41 +Object::AggregateIterator::AggregateIterator (Ptr<const Object> object)
    1.42 +  : m_object (object),
    1.43 +    m_current (0)
    1.44  {}
    1.45  
    1.46  
    1.47 @@ -85,18 +82,45 @@
    1.48    : m_count (1),
    1.49      m_tid (Object::GetTypeId ()),
    1.50      m_disposed (false),
    1.51 -    m_next (this)
    1.52 -{}
    1.53 +    m_aggregates ((struct Aggregates *)malloc (sizeof (struct Aggregates))),
    1.54 +    m_getObjectCount (0)
    1.55 +{
    1.56 +  m_aggregates->n = 1;
    1.57 +  m_aggregates->buffer[0] = this;
    1.58 +}
    1.59  Object::~Object () 
    1.60  {
    1.61 -  m_next = 0;
    1.62 +  // remove this object from the aggregate list
    1.63 +  uint32_t n = m_aggregates->n;
    1.64 +  for (uint32_t i = 0; i < n; i++)
    1.65 +    {
    1.66 +      Object *current = m_aggregates->buffer[i];
    1.67 +      if (current == this)
    1.68 +        {
    1.69 +          memmove (&m_aggregates->buffer[i], 
    1.70 +                   &m_aggregates->buffer[i+1],
    1.71 +                   sizeof (Object *)*(m_aggregates->n - (i+1)));
    1.72 +          m_aggregates->n--;
    1.73 +        }
    1.74 +    }
    1.75 +  // finally, if all objects have been removed from the list,
    1.76 +  // delete the aggregate list
    1.77 +  if (m_aggregates->n == 0)
    1.78 +    {
    1.79 +      free (m_aggregates);
    1.80 +    }
    1.81 +  m_aggregates = 0;
    1.82  }
    1.83  Object::Object (const Object &o)
    1.84    : m_count (1),
    1.85      m_tid (o.m_tid),
    1.86      m_disposed (false),
    1.87 -    m_next (this)
    1.88 -{}
    1.89 +    m_aggregates ((struct Aggregates *)malloc (sizeof (struct Aggregates))),
    1.90 +    m_getObjectCount (0)
    1.91 +{
    1.92 +  m_aggregates->n = 1;
    1.93 +  m_aggregates->buffer[0] = this;
    1.94 +}
    1.95  uint32_t
    1.96  Object::GetReferenceCount (void) const
    1.97  {
    1.98 @@ -112,48 +136,58 @@
    1.99  Object::DoGetObject (TypeId tid) const
   1.100  {
   1.101    NS_ASSERT (CheckLoose ());
   1.102 -  const Object *currentObject = this;
   1.103 -  const Object *prevObject = 0;
   1.104 +
   1.105 +  uint32_t n = m_aggregates->n;
   1.106    TypeId objectTid = Object::GetTypeId ();
   1.107 -  do {
   1.108 -    NS_ASSERT (currentObject != 0);
   1.109 -    TypeId cur = currentObject->GetInstanceTypeId ();
   1.110 -    while (cur != tid && cur != objectTid)
   1.111 -      {
   1.112 -        cur = cur.GetParent ();
   1.113 -      }
   1.114 -    if (cur == tid)
   1.115 -      {
   1.116 -        if (prevObject != 0)
   1.117 -          {
   1.118 -            // This is an attempt to 'cache' the result of this lookup.
   1.119 -            // the idea is that if we perform a lookup for a TypdId on this object,
   1.120 -            // we are likely to perform the same lookup later so, we re-order
   1.121 -            // the circular linked-list of objects here by putting the object we 
   1.122 -            // just found at the head of the list. This optimization is
   1.123 -            // _extremely_ effective in general.
   1.124 -            const_cast<Object*>(prevObject)->m_next = currentObject->m_next;
   1.125 -            const_cast<Object*>(currentObject)->m_next = m_next;
   1.126 -            const_cast<Object*>(this)->m_next = (Object*)currentObject;
   1.127 -          }
   1.128 -        return const_cast<Object *> (currentObject);
   1.129 -      }
   1.130 -    prevObject = currentObject;
   1.131 -    currentObject = currentObject->m_next;
   1.132 -  } while (currentObject != this);
   1.133 +  for (uint32_t i = 0; i < n; i++)
   1.134 +    {
   1.135 +      Object *current = m_aggregates->buffer[i];
   1.136 +      TypeId cur = current->GetInstanceTypeId ();
   1.137 +      while (cur != tid && cur != objectTid)
   1.138 +        {
   1.139 +          cur = cur.GetParent ();
   1.140 +        }
   1.141 +      if (cur == tid)
   1.142 +        {
   1.143 +          // This is an attempt to 'cache' the result of this lookup.
   1.144 +          // the idea is that if we perform a lookup for a TypeId on this object,
   1.145 +          // we are likely to perform the same lookup later so, we make sure
   1.146 +          // that the aggregate array is sorted by the number of accesses
   1.147 +          // to each object.
   1.148 +
   1.149 +          // first, increment the access count
   1.150 +          current->m_getObjectCount++;
   1.151 +          // then, update the sort
   1.152 +          UpdateSortedArray (m_aggregates, i);
   1.153 +          // finally, return the match
   1.154 +          return const_cast<Object *> (current);
   1.155 +        }
   1.156 +    }
   1.157    return 0;
   1.158  }
   1.159  void 
   1.160  Object::Dispose (void)
   1.161  {
   1.162 -  Object *current = this;
   1.163 -  do {
   1.164 -    NS_ASSERT (current != 0);
   1.165 -    NS_ASSERT (!current->m_disposed);
   1.166 -    current->DoDispose ();
   1.167 -    current->m_disposed = true;
   1.168 -    current = current->m_next;
   1.169 -  } while (current != this);
   1.170 +  uint32_t n = m_aggregates->n;
   1.171 +  for (uint32_t i = 0; i < n; i++)
   1.172 +    {
   1.173 +      Object *current = m_aggregates->buffer[i];
   1.174 +      NS_ASSERT (!current->m_disposed);
   1.175 +      current->DoDispose ();
   1.176 +      current->m_disposed = true;
   1.177 +    }
   1.178 +}
   1.179 +void
   1.180 +Object::UpdateSortedArray (struct Aggregates *aggregates, uint32_t j) const
   1.181 +{
   1.182 +  while (j > 0 && 
   1.183 +         aggregates->buffer[j]->m_getObjectCount > aggregates->buffer[j-1]->m_getObjectCount)
   1.184 +    {
   1.185 +      Object *tmp = aggregates->buffer[j-1];
   1.186 +      aggregates->buffer[j-1] = aggregates->buffer[j];
   1.187 +      aggregates->buffer[j] = tmp;
   1.188 +      j--;
   1.189 +    }
   1.190  }
   1.191  void 
   1.192  Object::AggregateObject (Ptr<Object> o)
   1.193 @@ -171,20 +205,38 @@
   1.194      }
   1.195  
   1.196    Object *other = PeekPointer (o);
   1.197 -  Object *next = m_next;
   1.198 -  m_next = other->m_next;
   1.199 -  other->m_next = next;
   1.200 -  NS_ASSERT (CheckLoose ());
   1.201 -  NS_ASSERT (o->CheckLoose ());
   1.202 -  // call NotifyNewAggregate in the listed chain
   1.203 -  Object *currentObject = this;
   1.204 -  do 
   1.205 +  // first create the new aggregate buffer.
   1.206 +  uint32_t total = m_aggregates->n + other->m_aggregates->n;
   1.207 +  struct Aggregates *aggregates = 
   1.208 +    (struct Aggregates *)malloc (sizeof(struct Aggregates)+(total-1)*sizeof(Object*));
   1.209 +  aggregates->n = total;
   1.210 +  memcpy (&aggregates->buffer[0], 
   1.211 +          &m_aggregates->buffer[0], 
   1.212 +          m_aggregates->n*sizeof(Object*));
   1.213 +  // append the other aggregates in the new buffer
   1.214 +  for (uint32_t i = 0; i < other->m_aggregates->n; i++)
   1.215      {
   1.216 -      // the NotifyNewAggregate of the current object implementation
   1.217 -      // should be called on the next object in the linked chain
   1.218 -      currentObject->NotifyNewAggregate ();
   1.219 -      currentObject = currentObject->m_next;
   1.220 -    } while (currentObject != this);
   1.221 +      aggregates->buffer[m_aggregates->n+i] = other->m_aggregates->buffer[i];
   1.222 +      UpdateSortedArray (aggregates, m_aggregates->n + i);
   1.223 +    }
   1.224 +
   1.225 +  // free both aggregate buffers
   1.226 +  free (m_aggregates);
   1.227 +  free (other->m_aggregates);
   1.228 +
   1.229 +  // Then, assign that buffer to every object
   1.230 +  uint32_t n = aggregates->n;
   1.231 +  for (uint32_t i = 0; i < n; i++)
   1.232 +    {
   1.233 +      Object *current = aggregates->buffer[i];
   1.234 +      current->m_aggregates = aggregates;
   1.235 +    }
   1.236 +  // Finally, call NotifyNewAggregate in the listed chain
   1.237 +  for (uint32_t i = 0; i < n; i++)
   1.238 +    {
   1.239 +      Object *current = m_aggregates->buffer[i];
   1.240 +      current->NotifyNewAggregate ();
   1.241 +    }
   1.242  }
   1.243  /**
   1.244   * This function must be implemented in the stack that needs to notify
   1.245 @@ -233,14 +285,12 @@
   1.246  Object::CheckLoose (void) const
   1.247  {
   1.248    uint32_t refcount = 0;
   1.249 -  const Object *current = this;
   1.250 -  do
   1.251 +  uint32_t n = m_aggregates->n;
   1.252 +  for (uint32_t i = 0; i < n; i++)
   1.253      {
   1.254 +      Object *current = m_aggregates->buffer[i];
   1.255        refcount += current->m_count;
   1.256 -      current = current->m_next;
   1.257      }
   1.258 -  while (current != this);
   1.259 -
   1.260    return (refcount > 0);
   1.261  }
   1.262  
   1.263 @@ -249,38 +299,38 @@
   1.264  {
   1.265    // First, check if any of the attached
   1.266    // Object has a non-zero count.
   1.267 -  const Object *current = this;
   1.268 -  do {
   1.269 -    NS_ASSERT (current != 0);
   1.270 -    if (current->m_count != 0)
   1.271 -      {
   1.272 -        return;
   1.273 -      }
   1.274 -    current = current->m_next;
   1.275 -  } while (current != this);
   1.276 +  uint32_t n = m_aggregates->n;
   1.277 +  for (uint32_t i = 0; i < n; i++)
   1.278 +    {
   1.279 +      Object *current = m_aggregates->buffer[i];
   1.280 +      if (current->m_count != 0)
   1.281 +        {
   1.282 +          return;
   1.283 +        }
   1.284 +    }
   1.285  
   1.286    // Ensure we are disposed.
   1.287 -  Object *tmp = const_cast<Object *> (this);
   1.288 -  const Object *end = this;
   1.289 -  do {
   1.290 -    NS_ASSERT (current != 0);
   1.291 -    Object *next = tmp->m_next;
   1.292 -    if (!tmp->m_disposed)
   1.293 -      {
   1.294 -        tmp->DoDispose ();
   1.295 -      }
   1.296 -    tmp = next;
   1.297 -  } while (tmp != end);
   1.298 +  for (uint32_t i = 0; i < n; i++)
   1.299 +    {
   1.300 +      Object *current = m_aggregates->buffer[i];
   1.301 +      if (!current->m_disposed)
   1.302 +        {
   1.303 +          current->DoDispose ();
   1.304 +        }
   1.305 +    }
   1.306  
   1.307    // all attached objects have a zero count so, 
   1.308 -  // we can delete all attached objects.
   1.309 -  current = this;
   1.310 -  do {
   1.311 -    NS_ASSERT (current != 0);
   1.312 -    Object *next = current->m_next;
   1.313 -    delete current;
   1.314 -    current = next;
   1.315 -  } while (current != end);
   1.316 +  // we can delete them all.
   1.317 +  struct Aggregates *aggregates = m_aggregates;
   1.318 +  for (uint32_t i = 0; i < n; i++)
   1.319 +    {
   1.320 +      // There is a trick here: each time we call delete below,
   1.321 +      // the deleted object is removed from the aggregate buffer
   1.322 +      // in the destructor so, the index of the next element to 
   1.323 +      // lookup is always zero
   1.324 +      Object *current = aggregates->buffer[0];
   1.325 +      delete current;
   1.326 +    }
   1.327  }
   1.328  } // namespace ns3
   1.329  
     2.1 --- a/src/core/object.h	Wed Nov 04 22:08:21 2009 +0100
     2.2 +++ b/src/core/object.h	Thu Nov 05 21:04:05 2009 +0100
     2.3 @@ -85,9 +85,9 @@
     2.4      Ptr<const Object> Next (void);
     2.5    private:
     2.6      friend class Object;
     2.7 -    AggregateIterator (Ptr<const Object> first);
     2.8 -    Ptr<const Object> m_first;
     2.9 -    Ptr<const Object> m_current;
    2.10 +    AggregateIterator (Ptr<const Object> object);
    2.11 +    Ptr<const Object> m_object;
    2.12 +    uint32_t m_current;
    2.13    };
    2.14  
    2.15    Object ();
    2.16 @@ -215,6 +215,21 @@
    2.17    friend class ObjectFactory;
    2.18    friend class AggregateIterator;
    2.19  
    2.20 +  /**
    2.21 +   * This data structure uses a classic C-style trick to 
    2.22 +   * hold an array of variable size without performing
    2.23 +   * two memory allocations: the declaration of the structure
    2.24 +   * declares a one-element array but when we allocate
    2.25 +   * memory for this struct, we effectively allocate a larger
    2.26 +   * chunk of memory than the struct to allow space for a larger
    2.27 +   * variable sized buffer whose size is indicated by the element
    2.28 +   * 'n'
    2.29 +   */
    2.30 +  struct Aggregates {
    2.31 +    uint32_t n;
    2.32 +    Object *buffer[1];
    2.33 +  };
    2.34 +
    2.35    Ptr<Object> DoGetObject (TypeId tid) const;
    2.36    bool Check (void) const;
    2.37    bool CheckLoose (void) const;
    2.38 @@ -243,6 +258,8 @@
    2.39     */
    2.40    void Construct (const AttributeList &attributes);
    2.41  
    2.42 +  void UpdateSortedArray (struct Aggregates *aggregates, uint32_t i) const;
    2.43 +
    2.44    /**
    2.45     * The reference count for this object. Each aggregate
    2.46     * has an individual reference count. When the global
    2.47 @@ -261,13 +278,19 @@
    2.48     */
    2.49    bool m_disposed;
    2.50    /**
    2.51 -   * A pointer to the next aggregate object. This is a circular
    2.52 -   * linked list of aggregated objects: the last one points
    2.53 -   * back to the first one. If an object is not aggregated to
    2.54 -   * any other object, the value of this field is equal to the
    2.55 -   * value of the 'this' pointer.
    2.56 +   * a pointer to an array of 'aggregates'. i.e., a pointer to
    2.57 +   * each object aggregated to this object is stored in this 
    2.58 +   * array. The array is shared by all aggregated objects
    2.59 +   * so the size of the array is indirectly a reference count.
    2.60     */
    2.61 -  Object *m_next;
    2.62 +  struct Aggregates * m_aggregates;
    2.63 +  /**
    2.64 +   * Indicates the number of times the object was accessed with a
    2.65 +   * call to GetObject. This integer is used to implement a
    2.66 +   * heuristic to sort the array of aggregates to put at the start
    2.67 +   * of the array the most-frequently accessed elements.
    2.68 +   */
    2.69 +  uint32_t m_getObjectCount;
    2.70  };
    2.71  
    2.72  /**