parallel_reduce.h

00001 /*
00002     Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_parallel_reduce_H
00022 #define __TBB_parallel_reduce_H
00023 
00024 #include "task.h"
00025 #include "aligned_space.h"
00026 #include "partitioner.h"
00027 #include "tbb_profiling.h"
00028 #include <new>
00029 
00030 namespace tbb {
00031 
00033 namespace internal {
00035 
00036     typedef char reduction_context;
00037 
00039 
00040     template<typename Body>
00041     class finish_reduce: public task {
00043         Body* my_body;
00044         bool has_right_zombie;
00045         const reduction_context my_context;
00046         aligned_space<Body,1> zombie_space;
00047         finish_reduce( reduction_context context_ ) : 
00048             my_body(NULL),
00049             has_right_zombie(false),
00050             my_context(context_)
00051         {
00052         }
00053         task* execute() {
00054             if( has_right_zombie ) {
00055                 // Right child was stolen.
00056                 Body* s = zombie_space.begin();
00057                 my_body->join( *s );
00058                 s->~Body();
00059             }
00060             if( my_context==1 )  // left child
00061                 itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
00062             return NULL;
00063         }
00064         template<typename Range,typename Body_, typename Partitioner>
00065         friend class start_reduce;
00066     };
00067 
00069 
00070     template<typename Range, typename Body, typename Partitioner>
00071     class start_reduce: public task {
00072         typedef finish_reduce<Body> finish_type;
00073         Body* my_body;
00074         Range my_range;
00075         typename Partitioner::partition_type my_partition;
00076         reduction_context my_context;
00077         /*override*/ task* execute();
00078         template<typename Body_>
00079         friend class finish_reduce;
00080     
00082         start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
00083             my_body(body),
00084             my_range(range),
00085             my_partition(partitioner),
00086             my_context(0)
00087         {
00088         }
00090 
00091         start_reduce( start_reduce& parent_, split ) :
00092             my_body(parent_.my_body),
00093             my_range(parent_.my_range,split()),
00094             my_partition(parent_.my_partition,split()),
00095             my_context(2)
00096         {
00097             my_partition.set_affinity(*this);
00098             parent_.my_context = 1;
00099         }
00101         /*override*/ void note_affinity( affinity_id id ) {
00102             my_partition.note_affinity( id );
00103         }
00104 
00105 public:
00106         static void run( const Range& range, Body& body, Partitioner& partitioner ) {
00107             if( !range.empty() ) {
00108 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
00109                 task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
00110 #else
00111                 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
00112                 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
00113                 task_group_context context;
00114                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
00115 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
00116             }
00117         }
00118 #if __TBB_TASK_GROUP_CONTEXT
00119         static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
00120             if( !range.empty() ) 
00121                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
00122         }
00123 #endif /* __TBB_TASK_GROUP_CONTEXT */
00124     };
00125 
00126     template<typename Range, typename Body, typename Partitioner>
00127     task* start_reduce<Range,Body,Partitioner>::execute() {
00128         if( my_context==2 ) { // right child
00129             finish_type* p = static_cast<finish_type*>(parent());
00130             if( !itt_load_word_with_acquire(p->my_body) ) {
00131                 my_body = new( p->zombie_space.begin() ) Body(*my_body,split());
00132                 p->has_right_zombie = true;
00133             }
00134         }
00135         if( !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
00136             (*my_body)( my_range );
00137             if( my_context==1 ) 
00138                 itt_store_word_with_release(static_cast<finish_type*>(parent())->my_body, my_body );
00139             return my_partition.continue_after_execute_range();
00140         } else {
00141             finish_type& c = *new( allocate_continuation()) finish_type(my_context);
00142             recycle_as_child_of(c);
00143             c.set_ref_count(2);
00144             bool delay = my_partition.decide_whether_to_delay();
00145             start_reduce& b = *new( c.allocate_child() ) start_reduce(*this,split());
00146             my_partition.spawn_or_delay(delay,b);
00147             return this;
00148         }
00149     }
00150 
00151 #if TBB_PREVIEW_DETERMINISTIC_REDUCE
00153 
00154     template<typename Body>
00155     class finish_deterministic_reduce: public task {
00156         Body &my_left_body;
00157         Body my_right_body;
00158 
00159         finish_deterministic_reduce( Body &body ) :
00160             my_left_body( body ),
00161             my_right_body( body, split() )
00162         {
00163         }
00164         task* execute() {
00165             my_left_body.join( my_right_body );
00166             return NULL;
00167         }
00168         template<typename Range,typename Body_>
00169         friend class start_deterministic_reduce;
00170     };
00171 
00173 
00174     template<typename Range, typename Body>
00175     class start_deterministic_reduce: public task {
00176         typedef finish_deterministic_reduce<Body> finish_type;
00177         Body &my_body;
00178         Range my_range;
00179         /*override*/ task* execute();
00180 
00182         start_deterministic_reduce( const Range& range, Body& body ) :
00183             my_body( body ),
00184             my_range( range )
00185         {
00186         }
00188 
00189         start_deterministic_reduce( start_deterministic_reduce& parent_, finish_type& c ) :
00190             my_body( c.my_right_body ),
00191             my_range( parent_.my_range, split() )
00192         {
00193         }
00194 
00195 public:
00196         static void run( const Range& range, Body& body ) {
00197             if( !range.empty() ) {
00198 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
00199                 task::spawn_root_and_wait( *new(task::allocate_root()) start_deterministic_reduce(range,&body) );
00200 #else
00201                 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
00202                 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
00203                 task_group_context context;
00204                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
00205 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
00206             }
00207         }
00208 #if __TBB_TASK_GROUP_CONTEXT
00209         static void run( const Range& range, Body& body, task_group_context& context ) {
00210             if( !range.empty() ) 
00211                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
00212         }
00213 #endif /* __TBB_TASK_GROUP_CONTEXT */
00214     };
00215 
00216     template<typename Range, typename Body>
00217     task* start_deterministic_reduce<Range,Body>::execute() {
00218         if( !my_range.is_divisible() ) {
00219             my_body( my_range );
00220             return NULL;
00221         } else {
00222             finish_type& c = *new( allocate_continuation() ) finish_type( my_body );
00223             recycle_as_child_of(c);
00224             c.set_ref_count(2);
00225             start_deterministic_reduce& b = *new( c.allocate_child() ) start_deterministic_reduce( *this, c );
00226             task::spawn(b);
00227             return this;
00228         }
00229     }
00230 #endif /* TBB_PREVIEW_DETERMINISTIC_REDUCE */
00231 
00233 
00237     template<typename Range, typename Value, typename RealBody, typename Reduction>
00238     class lambda_reduce_body {
00239 
00240 //FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
00241 //       (might require some performance measurements)
00242 
00243         const Value&     identity_element;
00244         const RealBody&  my_real_body;
00245         const Reduction& my_reduction;
00246         Value            my_value;
00247         lambda_reduce_body& operator= ( const lambda_reduce_body& other );
00248     public:
00249         lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
00250             : identity_element(identity)
00251             , my_real_body(body)
00252             , my_reduction(reduction)
00253             , my_value(identity)
00254         { }
00255         lambda_reduce_body( const lambda_reduce_body& other )
00256             : identity_element(other.identity_element)
00257             , my_real_body(other.my_real_body)
00258             , my_reduction(other.my_reduction)
00259             , my_value(other.my_value)
00260         { }
00261         lambda_reduce_body( lambda_reduce_body& other, tbb::split )
00262             : identity_element(other.identity_element)
00263             , my_real_body(other.my_real_body)
00264             , my_reduction(other.my_reduction)
00265             , my_value(other.identity_element)
00266         { }
00267         void operator()(Range& range) {
00268             my_value = my_real_body(range, const_cast<const Value&>(my_value));
00269         }
00270         void join( lambda_reduce_body& rhs ) {
00271             my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
00272         }
00273         Value result() const {
00274             return my_value;
00275         }
00276     };
00277 
00278 } // namespace internal
00280 
00281 // Requirements on Range concept are documented in blocked_range.h
00282 
00301 
00303 
00304 template<typename Range, typename Body>
00305 void parallel_reduce( const Range& range, Body& body ) {
00306     internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
00307 }
00308 
00310 
00311 template<typename Range, typename Body>
00312 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00313     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
00314 }
00315 
00317 
00318 template<typename Range, typename Body>
00319 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00320     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
00321 }
00322 
00324 
00325 template<typename Range, typename Body>
00326 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
00327     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
00328 }
00329 
00330 #if __TBB_TASK_GROUP_CONTEXT
00332 
00333 template<typename Range, typename Body>
00334 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
00335     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
00336 }
00337 
00339 
00340 template<typename Range, typename Body>
00341 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
00342     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
00343 }
00344 
00346 
00347 template<typename Range, typename Body>
00348 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
00349     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
00350 }
00351 #endif /* __TBB_TASK_GROUP_CONTEXT */
00352 
00356 
00357 
00358 template<typename Range, typename Value, typename RealBody, typename Reduction>
00359 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
00360     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00361     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
00362                           ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
00363     return body.result();
00364 }
00365 
00367 
00368 template<typename Range, typename Value, typename RealBody, typename Reduction>
00369 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00370                        const simple_partitioner& partitioner ) {
00371     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00372     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
00373                           ::run(range, body, partitioner );
00374     return body.result();
00375 }
00376 
00378 
00379 template<typename Range, typename Value, typename RealBody, typename Reduction>
00380 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00381                        const auto_partitioner& partitioner ) {
00382     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00383     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
00384                           ::run( range, body, partitioner );
00385     return body.result();
00386 }
00387 
00389 
00390 template<typename Range, typename Value, typename RealBody, typename Reduction>
00391 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00392                        affinity_partitioner& partitioner ) {
00393     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00394     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
00395                                         ::run( range, body, partitioner );
00396     return body.result();
00397 }
00398 
00399 #if __TBB_TASK_GROUP_CONTEXT
00401 
00402 template<typename Range, typename Value, typename RealBody, typename Reduction>
00403 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00404                        const simple_partitioner& partitioner, task_group_context& context ) {
00405     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00406     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
00407                           ::run( range, body, partitioner, context );
00408     return body.result();
00409 }
00410 
00412 
00413 template<typename Range, typename Value, typename RealBody, typename Reduction>
00414 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00415                        const auto_partitioner& partitioner, task_group_context& context ) {
00416     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00417     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
00418                           ::run( range, body, partitioner, context );
00419     return body.result();
00420 }
00421 
00423 
00424 template<typename Range, typename Value, typename RealBody, typename Reduction>
00425 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00426                        affinity_partitioner& partitioner, task_group_context& context ) {
00427     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00428     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
00429                                         ::run( range, body, partitioner, context );
00430     return body.result();
00431 }
00432 #endif /* __TBB_TASK_GROUP_CONTEXT */
00433 
00434 #if TBB_PREVIEW_DETERMINISTIC_REDUCE
00436 
00437 template<typename Range, typename Body>
00438 void parallel_deterministic_reduce( const Range& range, Body& body ) {
00439     internal::start_deterministic_reduce<Range,Body>::run( range, body );
00440 }
00441 
00442 #if __TBB_TASK_GROUP_CONTEXT
00444 
00445 template<typename Range, typename Body>
00446 void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
00447     internal::start_deterministic_reduce<Range,Body>::run( range, body, context );
00448 }
00449 #endif /* __TBB_TASK_GROUP_CONTEXT */
00450 
00454 
00455 
00456 template<typename Range, typename Value, typename RealBody, typename Reduction>
00457 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
00458     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00459     internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
00460                           ::run(range, body);
00461     return body.result();
00462 }
00463 
00464 #if __TBB_TASK_GROUP_CONTEXT
00466 
00467 template<typename Range, typename Value, typename RealBody, typename Reduction>
00468 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00469                        task_group_context& context ) {
00470     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00471     internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
00472                           ::run( range, body, context );
00473     return body.result();
00474 }
00475 #endif /* __TBB_TASK_GROUP_CONTEXT */
00476 #endif /* TBB_PREVIEW_DETERMINISTIC_REDUCE */
00477 
00478 
00479 } // namespace tbb
00480 
00481 #endif /* __TBB_parallel_reduce_H */
00482 

Copyright © 2005-2011 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.