Publications – Parallel Programming Models Group

2019

M. Danelutto, D. De Sensi, G. Mencagli, and M. Torquati, “Autonomic management experiences in structured parallel programming,” in High performance computing simulation (hpcs), 2019 international conference on, Dublin, Ireland, 2019.
[Abstract] [BibTeX] [Download PDF]

@INPROCEEDINGS{hpcs2019,
author = {Danelutto, Marco and De Sensi, Daniele and Mencagli, Gabriele and Torquati, Massimo},
booktitle={High Performance Computing Simulation (HPCS), 2019 International Conference on},
title={Autonomic Management Experiences in Structured Parallel Programming},
year={2019},
month={July},
address={Dublin, Ireland},
toappear = {},
abstract = {Structured parallel programming models based on parallel design patterns are gaining more and more importance. Several state-of-the-art industrial frameworks build on the parallel design pattern concept, including Intel TBB and Microsoft PPL. In these frameworks, the explicit exposition of parallel structure of the application favours the identification of the inefficiencies, the exploitation of techniques increasing the efficiency of the implementation and ensures that most of the more critical aspects related to an efficient exploitation of the available parallelism are moved from application programmers to framework designers.
The very same exposition of the graph representing the parallel activities enables framework designers to emplace efficient autonomic management of non functional concerns, such as performance tuning or power management.
In this paper, we discuss how autonomic management features evolved in different structured parallel programming frameworks based on the algorithmic skeletons and parallel design patterns.
We show that different levels of autonomic management are possible, ranging from simple provisioning of mechanisms suitable to support programmers in the implementation of ad hoc autonomic managers to the complete autonomic managers whose behaviour may be programmed using high level rules by the application programmers. },
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2019_HPCS.pdf},
}

T. De Matteis, G. Mencagli, D. De Sensi, M. Torquati, and M. Danelutto, “Gasser: an auto-tunable system for general sliding-window streaming operators on gpus,” Ieee access, vol. 7, pp. 48753-48769, 2019. doi:10.1109/ACCESS.2019.2910312
[Abstract] [BibTeX]

@ARTICLE{8688411,
author={De Matteis, Tiziano and Mencagli, Gabriele and De Sensi, Daniele and Torquati, Massimo and Danelutto, Marco},
journal={IEEE Access},
title={GASSER: An Auto-Tunable System for General Sliding-Window Streaming Operators on GPUs},
year={2019},
volume={7},
number={},
pages={48753-48769},
keywords={graphics processing units;optimisation;parallel processing;query processing;popular streaming system;scalable streaming system;completely general queries;incremental queries;GASSER;auto-tunable system;general sliding-window streaming operators;stream processing systems;high-volume data streams;commodity machines;distributed architectures;coprocessors;performance efficiency;data stream;parallelism;configuration parameters;optimal value;graphical processing units;data-parallel tasks;GPU processing;different processing models;streaming paradigm advocates;tuple-at-a-time processing model;sliding-window operators;completely general functions;parallel processing;nonincremental queries;existing GPU-based;auto-tuning approach;Microsoft Windows;Graphics processing units;Parallel processing;Throughput;Windows;Task analysis;Prototypes;Data stream processing;sliding-window queries;GPU processing;autotuning;self-configuring systems},
doi={10.1109/ACCESS.2019.2910312},
ISSN={2169-3536},
month={},
openaccess={https://ieeexplore.ieee.org/document/8688411},
abstract={Today's stream processing systems handle high-volume data streams in an efficient manner. To achieve this goal, they are designed to scale out on large clusters of commodity machines. However, despite the efficient use of distributed architectures, they lack support to co-processors like graphical processing units (GPUs) ready to accelerate data-parallel tasks. The main reason for this lack of integration is that GPU processing and the streaming paradigm have different processing models, with GPUs needing a bulk of data present at once while the streaming paradigm advocates a tuple-at-a-time processing model. This paper contributes to fill this gap by proposing Gasser, a system for offloading the execution of sliding-window operators on GPUs. The system focuses on completely general functions by targeting the parallel processing of non-incremental queries that are not supported by the few existing GPU-based streaming prototypes. Furthermore, Gasser provides an auto-tuning approach able to automatically find the optimal value of the configuration parameters (i.e., batch length and the degree of parallelism) needed to optimize throughput and latency with the given query and data stream. The experimental part assesses the performance efficiency of Gasser by comparing its peak throughput and latency against Apache Flink, a popular and scalable streaming system. Furthermore, we evaluate the penalty induced by supporting completely general queries against the performance achieved by the state-of-the-art solution specifically optimized for incremental queries. Finally, we show the speed and accuracy of the auto-tuning approach adopted by Gasser, which is able to self-configure the system by finding the right configuration parameters without manual tuning by the users.},
}

D. De Sensi and M. Danelutto, “Application-aware power capping using nornir,” in Parallel processing and applied mathematics, 2019.
[Abstract] [BibTeX] [Slides]

@InProceedings{ppam2019,
author="De Sensi, Daniele and Danelutto, Marco",
title="Application-Aware Power Capping using Nornir",
booktitle="Parallel Processing and Applied Mathematics",
year="2019",
abstract="Power consumption of IT infrastructure is a major concern for datacenter operators. Since datacenter power supply is usually dimensioned for an average-case scenario, uncorrelated and simultaneous power spikes in multiple servers could lead to catastrophic effects such as power outages. To avoid such situations, power capping solutions are usually put in place by datacenter operators, to control power consumption of individual server and to avoid the datacenter exceeding safe operational limits. However, most power capping solutions rely on Dynamic Voltage and Frequency Scaling (DVFS), which is not always able to guarantee the power cap specified by the user, especially for low power budget values. In this work we propose a power-capping algorithm that uses a combination of DVFS and Thread Packing. We implement such algorithm in the Nornir framework and we validate it on some real applications by comparing it to the RAPL power capping algorithm and to another state of the art power capping algorithm.",
slides="https://docs.google.com/presentation/d/1pIPrQk3HWW5zhdGM1ywM2o7AGbV_TOHfGkMSwfmqtcM/edit?usp=sharing",
toappear = "",
}

D. De Sensi and M. Danelutto, “Transparent autonomicity for openmp applications,” in Euro-par 2019: parallel processing workshops, 2019.
[Abstract] [BibTeX] [Slides]

@InProceedings{autodasp2019,
author="De Sensi, Daniele and Danelutto, Marco",
title="Transparent Autonomicity for OpenMP Applications",
booktitle="Euro-Par 2019: Parallel Processing Workshops",
year="2019",
abstract="One of the key needs of an autonomic computing system is the ability of monitor the application performance with minimal intrusiveness and performance overhead. Several solutions have been proposed, differing in terms of effort required to the application programmers to add autonomic capabilities to their applications. In this work we extend the Nornir autonomic framework, allowing it to transparently monitor OpenMP applications thanks to the novel OpenMP Tools (OMPT) API. By using this interface, we are able to transparently transfer performance monitoring information from the application to the Nornir framework. This does not require any manual intervention by the programmer, which can seamlessly control an already existing application, enforcing any performance and/or power consumption requirement. We evaluate our approach on some real applications from the PARSEC and NAS benchmarks, showing that our solution introduces a negligible performance overhead, while being able to correctly control applications' performance and power consumption.",
slides="https://docs.google.com/presentation/d/1GyUVj0YvF3tQhKH_zQEsM4_stQcR5srvUxHQsR8xfBk/edit?usp=sharing",
toappear="",
}

D. De Sensi, S. Di Girolamo, and T. Hoefler, “Mitigating network noise on dragonfly networks through application-aware routing,” in Proceedings of the international conference for high performance computing, networking, storage and analysis, New York, NY, USA, 2019, p. 16:1–16:32. doi:10.1145/3295500.3356196
[BibTeX] [URL] [Download PDF] [Slides]

@inproceedings{sc2019,
author = {De Sensi, Daniele and Di Girolamo, Salvatore and Hoefler, Torsten},
title = {Mitigating Network Noise on Dragonfly Networks Through Application-aware Routing},
booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis},
series = {SC '19},
year = {2019},
isbn = {978-1-4503-6229-0},
location = {Denver, Colorado},
pages = {16:1--16:32},
articleno = {16},
numpages = {32},
url = {http://doi.acm.org/10.1145/3295500.3356196},
doi = {10.1145/3295500.3356196},
acmid = {3356196},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {dragonfly, network noise, routing},
slides={https://docs.google.com/presentation/d/1XhnwR6oCBFMbrIWTHRqUVK6Yf4GGwRjOj1UX2nR-hcA/edit?usp=sharing},
openaccess={https://dl.acm.org/authorize?N690645},
pdf={http://pages.di.unipi.it/desensi/assets/pdf/2019_SC.pdf},
artifacts_available={},
}

D. Griebler, A. Vogel, D. De Sensi, M. Danelutto, and L. G. Fernandes, “Simplifying and implementing service level objectives for stream parallelism,” The journal of supercomputing, pp. 1-26, 2019. doi:10.1007/s11227-019-02914-6
[Abstract] [BibTeX] [URL] [Download PDF]

@Article{Griebler2019,
author="Griebler, Dalvan
and Vogel, Adriano
and De Sensi, Daniele
and Danelutto, Marco
and Fernandes, Luiz G.",
title="Simplifying and implementing service level objectives for stream parallelism",
journal="The Journal of Supercomputing",
year="2019",
month="Jun",
day="05",
pages="1-26",
abstract="An increasing attention has been given to provide service level objectives (SLOs) in stream processing applications due to the performance and energy requirements, and because of the need to impose limits in terms of resource usage while improving the system utilization. Since the current and next-generation computing systems are intrinsically offering parallel architectures, the software has to naturally exploit the architecture parallelism. Implement and meet SLOs on existing applications is not a trivial task for application programmers, since the software development process, besides the parallelism exploitation, requires the implementation of autonomic algorithms or strategies. This is a system-oriented programming approach and requires the management of multiple knobs and sensors (e.g., the number of threads to use, the clock frequency of the cores, etc.) so that the system can self-adapt at runtime. In this work, we introduce a new and simpler way to define SLO in the application's source code, by abstracting from the programmer all the details relative to self-adaptive system implementation. The application programmer specifies which parts of the code to parallelize and the related SLOs that should be enforced. To reach this goal, source-to-source code transformation rules are implemented in our compiler, which automatically generates self-adaptive strategies to enforce, at runtime, the user-expressed objectives. The experiments highlighted promising results with simpler, effective, and efficient SLO implementations for real-world applications.",
issn="1573-0484",
doi="10.1007/s11227-019-02914-6",
url="https://doi.org/10.1007/s11227-019-02914-6",
pdf="http://pages.di.unipi.it/desensi/assets/pdf/2019_JSC.pdf",
}

D. Griebler, D. De Sensi, A. Vogel, M. Danelutto, and L. G. Fernandes, “Service level objectives via c++11 attributes,” in Euro-par 2018: parallel processing workshops, Cham, 2019, pp. 745-756.
[Abstract] [BibTeX] [URL] [Download PDF] [Slides]

@InProceedings{spar:nornir,
author="Griebler, Dalvan and De Sensi, Daniele and Vogel, Adriano and Danelutto, Marco and Fernandes, Luiz Gustavo",
title="Service Level Objectives via C++11 Attributes",
booktitle="Euro-Par 2018: Parallel Processing Workshops",
year="2019",
publisher="Springer International Publishing",
address="Cham",
pages="745--756",
abstract="In recent years, increasing attention has been given to the possibility of guaranteeing Service Level Objectives (SLOs) to users about their applications, either regarding performance or power consumption. SLO can be implemented for parallel applications since they can provide many control knobs (e.g., the number of threads to use, the clock frequency of the cores, etc.) to tune the performance and power consumption of the application. Different from most of the existing approaches, we target sequential stream processing applications by proposing a solution based on C++ annotations. The user specifies which parts of the code to parallelize and what type of requirements should be enforced on that part of the code. Our solution first automatically parallelizes the annotated code and then applies self-adaptation approaches at run-time to enforce the user-expressed objectives. We ran experiments on different real-world applications, showing its simplicity and effectiveness.",
isbn="978-3-030-10549-5",
slides = "https://docs.google.com/presentation/d/1dv0mdCjP-n0QIDFoEiX61I2mCdLpFE0EFbWdaJZ0iEA/edit?usp=sharing",
pdf = "http://pages.di.unipi.it/desensi/assets/pdf/2018_REPARA.pdf",
url = "https://link.springer.com/chapter/10.1007%2F978-3-030-10549-5_58",
}

A. Vogel, D. Griebler, D. De Sensi, M. Danelutto, and L. G. Fernandes, “Autonomic and latency-aware degree of parallelism management in spar,” in Euro-par 2018: parallel processing workshops, Cham, 2019, pp. 28-39.
[Abstract] [BibTeX] [URL] [Download PDF]

@InProceedings{spar:latency,
author="Vogel, Adriano
and Griebler, Dalvan
and De Sensi, Daniele
and Danelutto, Marco
and Fernandes, Luiz Gustavo",
title="Autonomic and Latency-Aware Degree of Parallelism Management in SPar",
booktitle="Euro-Par 2018: Parallel Processing Workshops",
year="2019",
publisher="Springer International Publishing",
address="Cham",
pages="28--39",
abstract="Stream processing applications became a representative workload in current computing systems. A significant part of these applications demands parallelism to increase performance. However, programmers are often facing a trade-off between coding productivity and performance when introducing parallelism. SPar was created for balancing this trade-off to the application programmers by using the C++11 attributes' annotation mechanism. In SPar and other programming frameworks for stream processing applications, the manual definition of the number of replicas to be used for the stream operators is a challenge. In addition to that, low latency is required by several stream processing applications. We noted that explicit latency requirements are poorly considered on the state-of-the-art parallel programming frameworks. Since there is a direct relationship between the number of replicas and the latency of the application, in this work we propose an autonomic and adaptive strategy to choose the proper number of replicas in SPar to address latency constraints. We experimentally evaluated our implemented strategy and demonstrated its effectiveness on a real-world application, demonstrating that our adaptive strategy can provide higher abstraction levels while automatically managing the latency.",
isbn="978-3-030-10549-5",
pdf = "http://pages.di.unipi.it/desensi/assets/pdf/2018_AutoDasp.pdf",
url ="https://link.springer.com/chapter/10.1007%2F978-3-030-10549-5_3",
}

2018

A. Conte, D. De Sensi, R. Grossi, A. Marino, and L. Versari, “Discovering k-trusses in large-scale networks,” in 2018 ieee high performance extreme computing conference (hpec), 2018, pp. 1-6. doi:10.1109/HPEC.2018.8547735
[Abstract] [BibTeX] [Download PDF]

@INPROCEEDINGS{8547735,
author={Conte, Alessio and De Sensi, Daniele and Grossi, Roberto and Marino, Andrea and Versari, Luca},
abstract={A k-truss is a subgraph where every edge belongs to at least k-2 triangles in the subgraph. The truss decomposition assigns each edge the maximum k for which the edge belongs to a k-truss, and the trussness of a graph is the maximum among its edges. Discovery algorithms for k-trusses and truss decomposition provide useful insight for graph analytics (such as community detection). Even though they take polynomial time, on massive networks they suffer from handling a potentially cubic number of wedges: algorithms either need a long time to recompute triangles several times, have high memory usage, or rely on the large number of cores on graphic units. In this paper we describe EXTRUS, a highly optimized algorithm for truss decomposition which outperforms existing algorithms. We then introduce a faster algorithm, HYBTRUS, which finds the trussness of a graph using less time and space than EXTRUSS. Our algorithms take the best of existing approaches having good performance, low memory usage, and no need for sophisticated hardware systems.
We compare our algorithms with the state-of-the-art on a set of real-world and synthetic networks. EXTRUSS processes graphs with over a billion edges, which seems difficult for the competitors, and our HYBTRUSS is the first algorithm able to find the trussness of a graph with over 25 billion edges.},
booktitle={2018 IEEE High Performance extreme Computing Conference (HPEC)},
title={Discovering k-Trusses in Large-Scale Networks},
year={2018},
volume={},
number={},
pages={1-6},
keywords={graph theory;network theory (graphs);trussness;large-scale networks;subgraph;truss decomposition assigns;discovery algorithms;graph analytics;polynomial time;massive networks;high memory usage;EXTRUSS;k-trusses;HYBTRUSS;Approximation algorithms;Data structures;Image edge detection;Hardware;Indexes;Switches;Informatics;k-trusses;truss decomposition;graph algorithms;HPEC 2018 graph challenge},
doi={10.1109/HPEC.2018.8547735},
ISSN={2377-6943},
award={IEEE HPEC Graph Challenge Finalist},
pdf = "http://pages.di.unipi.it/desensi/assets/pdf/2018_HPEC.pdf",
month={Sep.},}

A. Conte, T. De Matteis, D. De Sensi, R. Grossi, A. Marino, and L. Versari, “D2k: scalable community detection in massive networks via small-diameter k-plexes,” in Proceedings of the 24th acm sigkdd international conference on knowledge discovery &\#38; data mining, New York, NY, USA, 2018, pp. 1272-1281. doi:10.1145/3219819.3220093
[Abstract] [BibTeX] [URL] [Download PDF]

@inproceedings{kdd:18,
author = {Conte, Alessio and De Matteis, Tiziano and De Sensi, Daniele and Grossi, Roberto and Marino, Andrea and Versari, Luca},
title = {D2K: Scalable Community Detection in Massive Networks via Small-Diameter k-Plexes},
booktitle = {Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery \&\#38; Data Mining},
series = {KDD '18},
year = {2018},
isbn = {978-1-4503-5552-0},
location = {London, United Kingdom},
pages = {1272--1281},
numpages = {10},
url = {http://doi.acm.org/10.1145/3219819.3220093},
doi = {10.1145/3219819.3220093},
acmid = {3220093},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {community discovery, graph enumeration, k-plexes, parallel programming},
openaccess = {https://dl.acm.org/authorize?N666390},
abstract = {This paper studies kplexes, a well known pseudo-clique model for network communities. In a kplex, each node can miss at most $k-1$ links. Our goal is to detect large communities in today's real-world graphs which can have hundreds of millions of edges. While many have tried, this task has been elusive so far due to its computationally challenging nature: kplexes and other pseudo-cliques are harder to find and more numerous than cliques, a well known hard problem. We present D2K, which is the first algorithm able to find large kplexes of very large graphs in just a few minutes. The good performance of our algorithm follows from a combination of graph-theoretical concepts, careful algorithm engineering and a high-performance implementation. In particular, we exploit the low degeneracy of real-world graphs, and the fact that large enough kplexes have diameter~2. We validate a sequential and a parallel/distributed implementation of D2K on real graphs with up to half a billion edges.},
videopitch = {https://www.youtube.com/watch?v=zF2Hz1wq9eM},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2018_KDD.pdf},
}

M. Danelutto and M. Torquati, “Increasing efficiency in parallel programming teaching,” in Proceedings of the 26th euromicro international conference on parallel, distributed, and network-based processing, 2018, pp. 306-310.
[Abstract] [BibTeX]

@inproceedings{teaching-pdp,
note = "DOI 10.1109/PDP2018.2018.00053",
abstract = {The ability to teach parallel programming principles
and techniques is becoming fundamental to prepare a new generation
of programmers able to master the pervasive parallelism
made available by hardware vendors. Classical parallel programming
courses leverage either low-level programming frameworks
(e.g. those based on Pthreads) or higher level frameworks
such as OpenMP or MPI. We discuss our teaching experience
within the Master in Computer Science and networking where
parallel programming is taught leveraging structured parallel
programming principles and frameworks. The paper summarizes
the results achieved in eight years of experience and shows how
the adoption of a structured parallel programming approach
improves the efficiency of the teaching process.},
pages = "306--310",
year = 2018,
publisher = "{IEEE Press}",
title = "{Increasing efficiency in parallel programming teaching}",
author = "M. Danelutto and M. Torquati",
booktitle = "Proceedings of the 26th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing"
}

D. De Sensi, T. De Matteis, and M. Danelutto, “Simplifying self-adaptive and power-aware computing with nornir,” Future generation computer systems, p. -, 2018. doi:https://doi.org/10.1016/j.future.2018.05.012
[Abstract] [BibTeX] [URL] [Download PDF]

@article{nornir:fgcs18,
title = {Simplifying self-adaptive and power-aware computing with Nornir},
journal = {Future Generation Computer Systems},
volume = {},
number = {},
pages = { - },
year = {2018},
note = {},
issn = {0167-739X},
doi = {https://doi.org/10.1016/j.future.2018.05.012},
url = {https://www.sciencedirect.com/science/article/pii/S0167739X17326699},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2018_FGCS.pdf},
author = {De Sensi, Daniele and De Matteis, Tiziano and Danelutto, Marco},
keywords = {Self-adaptive, Power-aware, Quality of service, Data stream processing, Fog computing, Parallel computing},
abstract = {Self-adaptation is an emerging requirement in parallel computing. It enables the dynamic selection of resources toallocate to the application in order to meet performance and power consumption requirements. This is particularly relevant in Fog Applications, where data is generated by a number of devices at a varying rate, according to users’ activity. By dynamically selecting the appropriate number of resources it is possible, for example, to use at each time step the minimum amount of resources needed to process the incoming data. Implementing such kind of algorithms may be a complex task, due to low-level interactions with the underlying hardware and to non-intrusive and low-overhead monitoring of the applications. For these reasons, in this paper we propose Nornir, a C++-based framework, which can be used to enforce performance and power consumption constraints on parallel applications running on shared memory multicores. The framework can be easily customized by algorithm designers to implement new self-adaptive policies. By instrumenting the applications in the \{PARSEC\} benchmark, we provide to strategy designers a wide set of applications already interfaced to Nornir. In addition to this, to prove its flexibility, we implemented and compared several state-of-the-art existing policies, showing that Nornir can also be used to easily analyze different algorithms and to provide useful insights on them.},
}

D. De Sensi, T. De Matteis, and M. Danelutto, “Nornir: a customizable framework for autonomic and power-aware applications,” in Euro-par 2017: parallel processing workshops, 2018, pp. 42-54. doi:10.1007/978-3-319-75178-8_4
[Abstract] [BibTeX] [URL] [Download PDF] [Slides]

@InProceedings{nornir:autodasp17,
author="De Sensi, Daniele
and De Matteis, Tiziano
and Danelutto, Marco",
editor="Heras, Dora B.
and Bouge, Luc",
title="Nornir: A Customizable Framework for Autonomic and Power-Aware Applications",
booktitle="Euro-Par 2017: Parallel Processing Workshops",
year="2018",
publisher="Springer International Publishing",
pages="42--54",
abstract="A desirable characteristic of modern parallel applications is the ability to dynamically select the amount of resources to be used to meet requirements on performance or power consumption. In many cases, providing explicit guarantees on performance is of paramount importance. In streaming applications, this is related with the concept of elasticity, i.e. being able to allocate the proper amount of resources to match the current demand as closely as possible. Similarly, in other scenarios, it may be useful to limit the maximum power consumption of an application to do not exceed the power budget. In this paper we propose Nornir, a customizable C++ framework for autonomic and power-aware parallel applications on shared memory multicore machines. Nornir can be used by autonomic strategy designers to implement new algorithms and by application users to enforce requirements on applications.",
isbn="978-3-319-75178-8",
doi="10.1007/978-3-319-75178-8_4",
slides = "https://docs.google.com/presentation/d/1PJ9gn_jIdApjrK1-wB3gnAB2PPYOsocxrqMTB96HI2E/edit?usp=sharing",
pdf = "http://pages.di.unipi.it/desensi/assets/pdf/2017_AutoDasp.pdf",
url = "https://link.springer.com/chapter/10.1007/978-3-319-75178-8_42",
}

L. Gazzarri and M. Danelutto, “A Tool to Support FastFlow Program Design,” in Parallel Computing is Everywhere, 2018, pp. 687-697.
[Abstract] [BibTeX] [URL]

@inproceedings{rplshell-parco17,
author = "Leonardo Gazzarri and Marco Danelutto",
title = "{A Tool to Support FastFlow Program Design}",
pages = "687--697",
note = "DOI10.3233/978-1-61499-843-3-687",
year = 2018,
booktitle = "{Parallel Computing is Everywhere}",
publisher = "IOS Press",
series = "{Advances in Parallel Computing}",
volume = 32,
url = "http://ebooks.iospress.nl/volumearticle/48667",
editor = "Bassini et al.",
abstract = {We describe the implementation of ff-RPL, a shell to support structured parallel programming development in FastFlow. The shell provides ways to explore the space of functionally equivalent, alternative parallel implementations of the same application with different non functional properties. The tool is entirely written in C++ and has been designed in such a way it can be easily extended to take into account new non functional features, refactoring and optimization rules, as well as different parallel patterns. Preliminary experimental results are shown relatively to the code generation part.}
}

D. Griebler, R. B. Hoffmann, M. Danelutto, and L. G. Fernandes, “Higher-Level Parallelism Abstractions for Video Applications with SPar,” in Parallel Computing is Everywhere, 2018, pp. 698-707.
[Abstract] [BibTeX] [URL]

@inproceedings{dalvan-parco17,
author = "Dalvan Griebler and Renato B. Hoffmann and Marco Danelutto and Luiz G. Fernandes",
title = "{Higher-Level Parallelism Abstractions for Video Applications with SPar}",
pages = "698--707",
note = "DOI: 10.3233/978-1-61499-843-3-698",
year = 2018,
url = "http://ebooks.iospress.nl/volumearticle/48668",
booktitle = "{Parallel Computing is Everywhere}",
publisher = "IOS Press",
series = "{Advances in Parallel Computing}",
volume = 32,
editor = "Bassini et al.",
abstract = {SPar is a Domain-Specific Language (DSL) designed to provide high-level parallel programming abstractions for streaming applications. Video processing application domain requires parallel processing to extract and analyze information quickly. When using state-of-the-art frameworks such as FastFlow and TBB, the application programmer has to manage source code re-factoring and performance optimization to implement parallelism efficiently. Our goal is to make this process easier for programmers through SPar. Thus we assess SPar's programming language and its performance in traditional video applications. We also discuss different implementations compared to the ones of SPar. Results demonstrate that SPar maintains the sequential code structure, is less code intrusive, and provides higher-level programming abstractions without introducing notable performance losses. Therefore, it represents a good choice for application programmers from the video processing domain. }
}

M. Torquati, D. De Sensi, G. Mencagli, M. Aldinucci, and M. Danelutto, “Power-aware pipelining with automatic concurrency control,” Concurrency and computation: practice and experience, p. e4652, 2018. doi:10.1002/cpe.4652
[Abstract] [BibTeX] [URL] [Download PDF]

@article{ccpe2018,
author = {Torquati, Massimo and De Sensi, Daniele and Mencagli, Gabriele and Aldinucci, Marco and Danelutto, Marco},
title = {Power-aware pipelining with automatic concurrency control},
journal = {Concurrency and Computation: Practice and Experience},
volume = {0},
number = {0},
year = {2018},
pages = {e4652},
keywords = {blocking, concurrency control, data pipelining, data streams, multicores, power saving},
doi = {10.1002/cpe.4652},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/cpe.4652},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2018_CCPE.pdf},
eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/cpe.4652},
note = {e4652 cpe.4652},
abstract = {Continuous streaming computations are usually composed of different modules, exchanging data through shared message queues. The selection of the algorithm used to access such queues (i.e. the "concurrency control") is a critical aspect both for performance and power consumption. In this paper we describe the design of automatic concurrency control algorithm for implementing power-efficient communications on shared-memory multicores. The algorithm automatically switches between "nonblocking" and "blocking" concurrency protocols, getting the best from the two worlds, i.e. obtaining the same throughput offered by the "nonblocking" implementation and the same power efficiency of the "blocking" concurrency protocol. We demonstrate the effectiveness of our approach using two micro-benchmarks and two real streaming applications.}
}

M. Torquati, T. Menga, T. De Matteis, D. De Sensi, and G. Mencagli, “Reducing message latency and cpu utilization in the caf actor framework,” in 2018 26th euromicro international conference on parallel, distributed and network-based processing (pdp), 2018, pp. 145-153. doi:10.1109/PDP2018.2018.00028
[Abstract] [BibTeX] [URL] [Download PDF]

@INPROCEEDINGS{cafpdp18,
abstract = {In this work, we consider the C++ Actor Framework (CAF), a recent proposal that revamped the interest in building concurrent and distributed applicaions using the actor programming model in C++. CAF has been optimized for high-throughput computing, whereas message latency between actors is greatly influenced by the message data rate: at low and moderate rates the latency is higher than at high data rates. To this end, we propose a modification of the polling strategies in the work-stealing CAF scheduler, which can reduce message latency at low and moderate data rates up to two orders of magnitude without compromising the overall throughput and message latency at maximum pressure. The technique proposed uses a lightweight event notification protocol that is general enough to be used used to optimize the runtime of other frameworks experiencing similar issues.},
author={Torquati, Massimo and Menga, Tullio and De Matteis, Tiziano and De Sensi, Daniele and Mencagli, Gabriele},
booktitle={2018 26th Euromicro International Conference on Parallel, Distributed and Network-based Processing (PDP)},
title={Reducing Message Latency and CPU Utilization in the CAF Actor Framework},
year={2018},
volume={},
number={},
pages={145-153},
keywords={C++ languages;Computational modeling;Message systems;Power demand;Programming;Runtime;Throughput;Actor model;CAF;message latency;multi-cores;polling strategies;work-stealing},
doi={10.1109/PDP2018.2018.00028},
ISSN={},
month={March},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2018_PDP.pdf},
url = {https://ieeexplore.ieee.org/abstract/document/8374451/},
}

2017

M. Danelutto, T. De Matteis, D. De Sensi, G. Mencagli, M. Torquati, M. Aldinucci, and P. Kilpatrick, “The rephrase extended pattern set for data intensive parallel computing,” International journal of parallel programming, p. 74–93, 2017. doi:10.1007/s10766-017-0540-z
[Abstract] [BibTeX] [URL]

@Article{rephrase:ijpp17,
author="Danelutto, Marco
and De Matteis, Tiziano
and De Sensi, Daniele
and Mencagli, Gabriele
and Torquati, Massimo
and Aldinucci, Marco
and Kilpatrick, Peter",
title="The RePhrase Extended Pattern Set for Data Intensive Parallel Computing",
journal="International Journal of Parallel Programming",
year="2017",
month="Nov",
day="28",
abstract="We discuss the extended parallel pattern set identified within the EU-funded project RePhrase as a candidate pattern set to support data intensive applications targeting heterogeneous architectures. The set has been designed to include three classes of pattern, namely (1) core patterns, modelling common, not necessarily data intensive parallelism exploitation patterns, usually to be used in composition; (2) high level patterns, modelling common, complex and complete parallelism exploitation patterns; and (3) building block patterns, modelling the single components of data intensive applications, suitable for use---in composition---to implement patterns not covered by the core and high level patterns. We discuss the expressive power of the RePhrase extended pattern set and results illustrating the performances that may be achieved with the FastFlow implementation of the high level patterns.",
issn="1573-7640",
doi="10.1007/s10766-017-0540-z",
openaccess="http://rdcu.be/zN6c",
url="https://doi.org/10.1007/s10766-017-0540-z",
pages="74–93",
}

M. Danelutto, D. De Sensi, and M. Torquati, “A power-aware, self-adaptive macro data flow framework,” Parallel processing letters, vol. 27, iss. 01, p. 1740004, 2017. doi:10.1142/S0129626417400047
[Abstract] [BibTeX] [URL] [Download PDF] [Slides]

@article{nornir:ppl17,
author = {Danelutto, Marco and De Sensi, Daniele and Torquati, Massimo},
title = {A Power-Aware, Self-Adaptive Macro Data Flow Framework},
journal = {Parallel Processing Letters},
abstract = {The dataflow programming model has been extensively used as an effective solution to implement efficient parallel programming frameworks. However, the amount of resources allocated to the runtime support is usually fixed once by the programmer or the runtime, and kept static during the entire execution. While there are cases where such a static choice may be appropriate, other scenarios may require to dynamically change the parallelism degree during the application execution. In this paper we propose an algorithm for multicore shared memory platforms, that dynamically selects the optimal number of cores to be used as well as their clock frequency according to either the workload pressure or to explicit user requirements. We implement the algorithm for both structured and unstructured parallel applications and we validate our proposal over three real applications, showing that it is able to save a significant amount of power, while not impairing the performance and not requiring additional effort from the application programmer.},
volume = {27},
slides = {https://docs.google.com/presentation/d/1CWjcuhewYe6ATDgY87S1Zu1sduVLZE6skIjy4y1wpwc/edit?usp=sharing},
number = {01},
pages = {1740004},
year = {2017},
month = {march},
doi = {10.1142/S0129626417400047},
URL = {http://www.worldscientific.com/doi/abs/10.1142/S0129626417400047},
eprint = {http://www.worldscientific.com/doi/pdf/10.1142/S0129626417400047},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2017_PPL.pdf},
}

M. Danelutto, T. De Matteis, D. De Sensi, and M. Torquati, “Evaluating concurrency throttling and thread packing on smt multicores,” in Proceedings of the 25th euromicro international conference on parallel, distributed, and network-based processing, PDP 2017, 2017, pp. 219-223. doi:10.1109/PDP.2017.39
[Abstract] [BibTeX] [URL] [Download PDF] [Slides]

@inproceedings{cttp:pdp17,
author = {Danelutto, Marco and De Matteis, Tiziano and De Sensi, Daniele and Torquati, Massimo},
title = {Evaluating Concurrency Throttling and Thread Packing on SMT Multicores},
booktitle = {Proceedings of the 25th Euromicro International Conference on Parallel, Distributed,
and Network-Based Processing, {PDP} 2017},
location = {St. Petersburg, Russia},
year = {2017},
doi={10.1109/PDP.2017.39},
pages={219-223},
abstract = {Power-aware computing is gaining an increasing attention both in academic and industrial settings. The problem of guaranteeing a given
QoS requirement (either in terms of performance or power consumption) can be faced by selecting and dynamically adapting the amount of physical
and logical resources used by the application. In this study, we considered standard multicore platforms by taking as a reference approaches for power-aware
computing two well-known dynamic reconfiguration techniques: Concurrency Throttling and Thread Packing. Furthermore, we also studied the impact of using simultaneous
multithreading (e.g., Intel’s HyperThreading) in both techniques. In this work, leveraging on the applications of the PARSEC benchmark suite, we evaluate these
techniques by considering performance-power trade-offs, resource efficiency, predictability and required programming effort. The results show that, according to the
comparison criteria, these techniques complement each other.},
slides = {https://docs.google.com/presentation/d/1qdiJIPpQ19rzgifHwoHYlwfQ0RurBz8xdvSz5zG0xco/edit?usp=sharing},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2017_PDP.pdf},
url = {http://ieeexplore.ieee.org/document/7912648/},
}

M. Danelutto, T. De Matteis, D. De Sensi, G. Mencagli, and M. Torquati, “P$^{3}$arsec: towards parallel patterns benchmarking,” in Proceedings of the 32nd annual acm symposium on applied computing, New York, NY, USA, 2017, pp. 1582-1589. doi:10.1145/3019612.3019745
[Abstract] [BibTeX] [URL] [Download PDF] [Slides]

@inproceedings{p3arsec:sac17,
author = {Danelutto, Marco and De Matteis, Tiziano and De Sensi, Daniele and Mencagli, Gabriele and Torquati, Massimo},
title = {P$^{3}$ARSEC: Towards Parallel Patterns Benchmarking},
isbn = {978-1-4503-4486-9},
pages = {1582--1589},
numpages = {8},
doi = {10.1145/3019612.3019745},
acmid = {3019745},
booktitle = {Proceedings of the 32nd Annual ACM Symposium on Applied Computing},
series = {SAC '17},
year = {2017},
location = {Marrakesh, Morocco},
numpages = {8},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Parallel Patterns, PARSEC Benchmarks, Intel KNL},
abstract = {High-level parallel programming is a de-facto standard approach to develop parallel software with reduced time to development. High-level abstractions are provided by existing frameworks as pragma-based annotations in the source code, or through pre-built parallel patterns that recur frequently in parallel algorithms, and that can be easily instantiated by the programmer to add a structure to the development of parallel software. In this paper we focus on this second approach and we propose P3ARSEC, a benchmark suite for parallel pattern-based frameworks consisting of a representative subset of PARSEC applications. We analyse the programmability advantages and the potential performance penalty of using such high-level methodology with respect to hand-made parallelisations using low-level mechanisms. The results are obtained on the new Intel Knights Landing multicore, and show a significantly reduced code complexity with comparable performance.},
slides = {https://docs.google.com/presentation/d/1tbGK13EGookcV1HvVbup2Rx1HlH65t4tsbhIuaoS3tA/edit#slide=id.g1b7a7fa945_0_14},
url = {http://doi.acm.org/10.1145/3019612.3019745},
openaccess = {http://dl.acm.org/authorize?N34889},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2017_SAC.pdf},
}

T. De Matteis and G. Mencagli, “Proactive elasticity and energy awareness in data stream processing,” Journal of systems and software, vol. 127, pp. 302-319, 2017. doi:http://dx.doi.org/10.1016/j.jss.2016.08.037
[Abstract] [BibTeX] [URL]

@article{dasp:jss17,
title = "Proactive elasticity and energy awareness in data stream processing ",
journal = "Journal of Systems and Software ",
volume = "127",
number = "",
pages = "302 - 319",
year = "2017",
note = "",
issn = "0164-1212",
doi = "http://dx.doi.org/10.1016/j.jss.2016.08.037",
url = "http://www.sciencedirect.com/science/article/pii/S0164121216301467",
author = "De Matteis, Tiziano and Mencagli, Gabriele",
keywords = "Data stream processing",
keywords = "Elasticity",
keywords = "Model predictive control",
keywords = "Frequency scaling ",
abstract = "Abstract Data stream processing applications have a long running nature (24 hr/7 d) with workload conditions that may exhibit wide variations at run-time. Elasticity is the term coined to describe the capability of applications to change dynamically their resource usage in response to workload fluctuations. This paper focuses on strategies for elastic data stream processing targeting multicore systems. The key idea is to exploit Model Predictive Control, a control-theoretic method that takes into account the system behavior over a future time horizon in order to decide the best reconfiguration to execute. We design a set of energy-aware proactive strategies, optimized for throughput and latency QoS requirements, which regulate the number of used cores and the \{CPU\} frequency through the Dynamic Voltage and Frequency Scaling (DVFS) support offered by modern multicore CPUs. We evaluate our strategies in a high-frequency trading application fed by synthetic and real-world workload traces. We introduce specific properties to effectively compare different elastic approaches, and the results show that our strategies are able to achieve the best outcome. "
}

T. De Matteis and G. Mencagli, “Elastic scaling for distributed latency-sensitive data stream operators,” in Proceedings of the 25th euromicro international conference on parallel, distributed, and network-based processing, PDP 2017, 2017.
[Abstract] [BibTeX] [Slides]

@inproceedings{dasp:pdp17,
author = {De Matteis, Tiziano and Mencagli, Gabriele},
title = {Elastic Scaling for Distributed Latency-sensitive Data Stream Operators},
booktitle = {Proceedings of the 25th Euromicro International Conference on Parallel, Distributed,
and Network-Based Processing, {PDP} 2017},
location = {St. Petersburg, Russia},
year = {2017},
abstract = {High-volume data streams are straining the limits of stream processing frameworks which need advanced parallel processing capabilities to withstand the actual incoming bandwidth.
Parallel processing must be synergically integrated with elastic features in order dynamically scale the amount of utilized resources by accomplishing the Quality of Service goals in a costeffective
manner. This paper proposes a control-theoretic strategy to drive the elastic behavior of latency-sensitive streaming operators in distributed environments. The strategy takes scaling
decisions in advance by relying on a predictive model-based approach. Our ideas have been experimentally evaluated on a cluster using a real-world streaming application fed by synthetic
and real datasets. The results show that our approach takes the strictly necessary reconfigurations while providing reduced resource consumption. Furthermore, it allows the operator to
meet desired average latency requirements with a significant reduction in the experienced latency jitter.},
slides = {https://docs.google.com/presentation/d/1QwB0-7STgB6BF9q_GPJBf1lYjuiQ9FCPmvg-xWIJAGI/edit?usp=sharing}
}

T. De Matteis and G. Mencagli, “Parallel patterns for window-based stateful operators on data streams: an algorithmic skeleton approach,” International journal of parallel programming, vol. 45, iss. 2, pp. 382-401, 2017. doi:10.1007/s10766-016-0413-x
[Abstract] [BibTeX] [URL] [Slides]

@Article{DeMatteis2017,
author="De Matteis, Tiziano
and Mencagli, Gabriele",
title="Parallel Patterns for Window-Based Stateful Operators on Data Streams: An Algorithmic Skeleton Approach",
journal="International Journal of Parallel Programming",
year="2017",
month="Apr",
day="01",
volume="45",
number="2",
pages="382--401",
abstract="The topic of Data Stream Processing is a recent and highly active research area dealing with the in-memory, tuple-by-tuple analysis of streaming data. Continuous queries typically consume huge volumes of data received at a great velocity. Solutions that persistently store all the input tuples and then perform off-line computation are impractical. Rather, queries must be executed continuously as data cross the streams. The goal of this paper is to present parallel patterns for window-based stateful operators, which are the most representative class of stateful data stream operators. Parallel patterns are presented ``{\`a} la'' Algorithmic Skeleton, by explaining the rationale of each pattern, the preconditions to safely apply it, and the outcome in terms of throughput, latency and memory consumption. The patterns have been implemented in the {\$}{\$}{\backslash}mathtt {\{}FastFlow{\}}{\$}{\$} FastFlow framework targeting off-the-shelf multicores. To the best of our knowledge this is the first time that a similar effort to merge the Data Stream Processing domain and the field of Structured Parallelism has been made.",
issn="1573-7640",
doi="10.1007/s10766-016-0413-x",
url="https://doi.org/10.1007/s10766-016-0413-x",
slides = {https://docs.google.com/presentation/d/1yhsSff97f434wR-VA1szlqKxx52YMYKkdw1GVkBDyF8/edit?usp=sharing},
}

D. De Sensi, M. Danelutto, and M. Torquati, “Nornir: a power-aware runtime support for parallel applications,” in Supercomputing doctoral showcase, Denver, Colorado, US, 2017.
[BibTeX] [Slides]

@inproceedings{scphd:17,
address = {Denver, Colorado, US},
author = {De Sensi, Daniele and Danelutto, Marco and Torquati, Massimo},
booktitle = {Supercomputing Doctoral Showcase},
title = {Nornir: A Power-Aware Runtime Support for Parallel Applications},
poster = {http://pages.di.unipi.it/desensi/assets/img/2017_Supercomputing.png},
slides = {https://docs.google.com/presentation/d/1O1jKbjiW8jOUTo2gl38b0UFLWrJZKF4C4HjwXNdykN8/edit?usp=sharing},
year = {2017},
month = {Nov},
pages = {},
}

D. De Sensi, P. Kilpatrick, and M. Torquati, “State-aware concurrency throttling,” in Proceedings of international parallel computing conference (ParCo), Bologna, Italy, 2017, pp. 201-210. doi:10.3233/978-1-61499-843-3-201
[Abstract] [BibTeX] [URL] [Download PDF] [Slides]

@inproceedings{stateawarethrottling,
abstract = {Reconfiguration of parallel applications has gained traction with the increasing emphasis on energy/performance trade-off. The ability to dynamically change the amount of resources used by an application allows reaction to changes in the environment, in the application behavior or in the user's requirements. A popular technique consists in changing the number of threads used by the application (Dynamic Concurrency Throttling). Although this provides good control of application performance and power consumption, managing the technique can impose a significant burden on the application programmer, mainly due to state management and redistribution following the addition or removal of a thread. Nevertheless, some common state access patterns have been identified in some popular applications. By leveraging on this knowledge, we will describe how it is possible to simplify the state management procedures following a Concurrency Throttling operation.},
address = {Bologna, Italy},
author = {De Sensi, Daniele and Kilpatrick, Peter and Torquati, Massimo},
booktitle = {Proceedings of International Parallel Computing Conference ({ParCo})},
keywords = {Power-Aware Computing, Concurrency Throttling, Data Stream Processing},
title = {State-Aware Concurrency Throttling},
pages= {201--210},
year = {2017},
slides = {https://docs.google.com/presentation/d/19TktfDoOvqhXUaB1iX7uL8-uF9ce_6X73zGd07TJFr4/edit?usp=sharing},
doi = {10.3233/978-1-61499-843-3-201},
url = {http://ebooks.iospress.nl/volumearticle/48609},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2017_ParCo.pdf},
}

D. De Sensi, M. Torquati, and M. Danelutto, “Mammut: high-level management of system knobs and sensors,” Softwarex, vol. 6, pp. 150-154, 2017. doi:http://dx.doi.org/10.1016/j.softx.2017.06.005
[Abstract] [BibTeX] [URL]

@article{mammut:softwarex,
title = "Mammut: High-level management of system knobs and sensors",
journal = "SoftwareX",
volume = "6",
number = "",
pages = "150 - 154",
year = "2017",
month = "jul",
note = "",
issn = "2352-7110",
doi = "http://dx.doi.org/10.1016/j.softx.2017.06.005",
url = "http://www.sciencedirect.com/science/article/pii/S2352711017300225",
openaccess = "http://www.sciencedirect.com/science/article/pii/S2352711017300225",
author = "De Sensi, Daniele and Torquati, Massimo and Danelutto, Marco",
keywords = "Energy profiling",
keywords = "DVFS",
keywords = "System management",
keywords = "Remote management",
abstract = {Managing low-level architectural features for controlling performance and power consumption is a growing demand in the parallel computing community. Such features include, but are not limited to: energy profiling, platform topology analysis, CPU cores disabling and frequency scaling. However, these low-level mechanisms are usually managed by specific tools, without any interaction between each other, thus hampering their usability. More important, most existing tools can only be used through a command line interface and they do not provide any API. Moreover, in most cases, they only allow monitoring and managing the same machine on which the tools are used. Mammut provides and integrates architectural management utilities through a high-level and easy-to-use object-oriented interface. By using Mammut, is possible to link together different collected information and to exploit them on both local and remote systems, to build architecture-aware applications.},
}

D. De Sensi, T. De Matteis, M. Torquati, G. Mencagli, and M. Danelutto, “Bringing parallel patterns out of the corner: the p$^{3}$arsec benchmark suite,” Acm trans. archit. code optim., vol. 14, iss. 4, p. 33:1–33:26, 2017. doi:10.1145/3132710
[Abstract] [BibTeX] [URL] [Download PDF]

@article{p3arsec:taco17,
author = {De Sensi, Daniele and De Matteis, Tiziano and Torquati, Massimo and Mencagli, Gabriele and Danelutto, Marco},
title = {Bringing Parallel Patterns Out of the Corner: The P$^{3}$ARSEC Benchmark Suite},
journal = {ACM Trans. Archit. Code Optim.},
issue_date = {October 2017},
volume = {14},
number = {4},
month = oct,
year = {2017},
issn = {1544-3566},
pages = {33:1--33:26},
articleno = {33},
numpages = {26},
url = {http://doi.acm.org/10.1145/3132710},
openaccess = {http://dl.acm.org/authorize?N49996},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2017_TACO.pdf},
doi = {10.1145/3132710},
acmid = {3132710},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Parallel patterns, algorithmic skeletons, benchmarking, multicore programming, parsec},
poster = {http://pages.di.unipi.it/desensi/assets/img/2017_TACO.png},
abstract = {High-level parallel programming is an active research topic aimed at promoting parallel programming methodologies that provide the programmer with high-level abstractions to develop complex parallel software with reduced time to solution. Pattern-based parallel programming is based on a set of composable and customizable parallel patterns used as basic building blocks in parallel applications. In recent years, a considerable effort has been made in empowering this programming model with features able to overcome shortcomings of early approaches concerning flexibility and performance. In this article, we demonstrate that the approach is flexible and efficient enough by applying it on 12 out of 13 PARSEC applications. Our analysis, conducted on three different multicore architectures, demonstrates that pattern-based parallel programming has reached a good level of maturity, providing comparable results in terms of performance with respect to both other parallel programming methodologies based on pragma-based annotations (i.e., Openmp and OmpSs) and native implementations (i.e., Pthreads). Regarding the programming effort, we also demonstrate a considerable reduction in lines of code and code churn compared to Pthreads and comparable results with respect to other existing implementations.},
}

D. Griebler, M. Danelutto, M. Torquati, and L. G. Fernandes, “Spar: A DSL for high-level and productive stream parallelism,” Parallel processing letters, vol. 27, iss. 1, pp. 1-20, 2017. doi:10.1142/S0129626417400059
[BibTeX] [URL]

@article{spar-ppl-dalvan,
author = {Dalvan Griebler and
Marco Danelutto and
Massimo Torquati and
Luiz Gustavo Fernandes},
title = {SPar: {A} {DSL} for High-Level and Productive Stream Parallelism},
journal = {Parallel Processing Letters},
volume = {27},
number = {1},
pages = {1--20},
year = {2017},
url = {http://dx.doi.org/10.1142/S0129626417400059},
doi = {10.1142/S0129626417400059},
timestamp = {Fri, 17 Mar 2017 16:44:56 +0100},
biburl = {http://dblp2.uni-trier.de/rec/bib/journals/ppl/GrieblerDTF17},
bibsource = {dblp computer science bibliography, http://dblp.org}
}

G. Mencagli, M. Torquati, M. Danelutto, and T. De Matteis, “Parallel continuous preference queries over out-of-order and bursty data streams,” Ieee transactions on parallel and distributed systems, vol. 28, iss. 9, pp. 2608-2624, 2017. doi:10.1109/TPDS.2017.2679197
[Abstract] [BibTeX]

@ARTICLE{tpds17,
author={Mencagli, Gabriele and Torquati, Massimo and Danelutto, Marco and De Matteis, Tiziano},
journal={IEEE Transactions on Parallel and Distributed Systems},
title={Parallel Continuous Preference Queries over Out-of-Order and Bursty Data Streams},
year={2017},
volume={28},
number={9},
pages={2608-2624},
abstract={Techniques to handle traffic bursts and out-of-order arrivals are of paramount importance to provide real-time sensor data analytics in domains like traffic surveillance, transportation management, healthcare and security applications. In these systems the amount of raw data coming from sensors must be analyzed by continuous queries that extract value-added information used to make informed decisions in real-time. To perform this task with timing constraints, parallelism must be exploited in the query execution in order to enable the real-time processing on parallel architectures. In this paper we focus on continuous preference queries, a representative class of continuous queries for decision making, and we propose a parallel query model targeting the efficient processing over out-of-order and bursty data streams. We study how to integrate punctuation mechanisms in order to enable out-of-order processing. Then, we present advanced scheduling strategies targeting scenarios with different burstiness levels, parameterized using the index of dispersion quantity. Extensive experiments have been performed using synthetic datasets and real-world data streams obtained from an existing real-time locating system. The experimental evaluation demonstrates the efficiency of our parallel solution and its effectiveness in handling the out-of-orderness degrees and burstiness levels of real-world applications.},
keywords={data analysis;parallel architectures;query processing;scheduling;bursty data streams;data analytics;decision making;out-of-order arrivals;out-of-order data streams;parallel architectures;parallel continuous preference queries;parallel query model;scheduling strategies;traffic bursts;Computational modeling;Data models;Multicore processing;Out of order;Parallel processing;Real-time systems;Parallelism;burstiness and traffic surges;continuous preference queries;data streams;multicores;out-of-order arrivals;sliding windows},
doi={10.1109/TPDS.2017.2679197},
ISSN={1045-9219},
month={Sept}}

M. Torquati, G. Mencagli, M. Drocco, M. Aldinucci, T. De Matteis, and M. Danelutto, “On dynamic memory allocation in sliding-window parallel patterns for streaming analytics,” The journal of supercomputing, 2017. doi:10.1007/s11227-017-2152-1
[Abstract] [BibTeX] [URL]

@Article{jsc17,
author="Torquati, M. and Mencagli, G. and Drocco, M. and Aldinucci, M. and De Matteis, T. and Danelutto, M.",
title="On dynamic memory allocation in sliding-window parallel patterns for streaming analytics",
journal="The Journal of Supercomputing",
year="2017",
month="Sep",
day="27",
abstract="This work studies the issues related to dynamic memory management in Data Stream Processing, an emerging paradigm enabling the real-time processing of live data streams. In this paper, we consider two streaming parallel patterns and we discuss different implementation variants related to how dynamic memory is managed. The results show that the standard mechanisms provided by modern C++ are not entirely adequate for maximizing the performance. Instead, the combined use of an efficient general purpose memory allocator, a custom allocator optimized for the pattern considered and a custom variant of the C++ shared pointer mechanism, provides a performance improvement up to 16{\%} on the best case.",
issn="1573-0484",
doi="10.1007/s11227-017-2152-1",
url="https://doi.org/10.1007/s11227-017-2152-1"
}

2016

A. Brogi, M. Danelutto, D. De Sensi, A. Ibrahim, J. Soldani, and M. Torquati, “Analysing multiple qos attributes in parallel design patterns-based applications,” International journal of parallel programming, pp. 1-20, 2016. doi:10.1007/s10766-016-0476-8
[Abstract] [BibTeX] [URL] [Download PDF]

@Article{pasa:ijpp16,
author="Brogi, Antonio and Danelutto, Marco and De Sensi, Daniele and Ibrahim, Ahmad and Soldani, Jacopo and Torquati, Massimo",
title="Analysing Multiple QoS Attributes in Parallel Design Patterns-Based Applications",
journal="International Journal of Parallel Programming",
year="2016",
month="november",
pages="1--20",
abstract="Parallel design patterns can be fruitfully combined to develop parallel software applications. Different combinations of patterns can feature different QoS while being functionally equivalent. To support application developers in selecting the best combinations of patterns to develop their applications, we hereby propose a probabilistic approach that permits analysing, at design time, multiple QoS attributes of parallel design patterns-based application. We also present a proof-of-concept implementation of our approach, together with some experimental results.",
issn="1573-7640",
doi="10.1007/s10766-016-0476-8",
openaccess="http://rdcu.be/yK2u",
url="http://dx.doi.org/10.1007/s10766-016-0476-8",
pdf="http://pages.di.unipi.it/desensi/assets/pdf/2016_IJPP.pdf",
}

M. Danelutto, C. Gallicchio, A. Micheli, M. Torquati, and D. Virgilio, “Structured parallel implementation of Tree Echo State Network model selection,” in Parallel Computing: On the Road to Exascale, Munich, Germany, 2016, pp. 145-154.
[BibTeX]

@InProceedings{parco15-virgilio,
author = "Marco Danelutto and Claudio Gallicchio and Alessio Micheli and Massimo Torquati and Daniele Virgilio",
title = "{Structured parallel implementation of Tree Echo State Network model selection}",
booktitle = "{Parallel Computing: On the Road to Exascale}",
series = "{Advances in Parallel Computing}",
address = {Munich, Germany},
publisher = {IOS Press},
editor = "{Gerhard R. Joubert and Hugh Leather and Mark Parsons and Frans Peters and Mark Sawyer}",
volume = 27,
pages = {145--154},
isbn = "{978-1-61499-620-0 (print) | 978-1-61499-621-7 (online)}",
note = "{proceedings of PARCO 2015}",
year = 2016
}

M. Danelutto, M. Torquati, and P. Kilpatrick, “State access patterns in embarrassingly parallel computations,” in Proceedings of the HLPGPU 2016 workshop (Co-Located with HiPEAC 2016), Prague, Czech Republic, 2016.
[BibTeX] [URL]

@inproceedings{state:pattern:hlpgpu:15,
author = "Marco Danelutto and Massimo Torquati and Peter Kilpatrick",
title = "{State access patterns in embarrassingly parallel computations}",
booktitle = "{Proceedings of the HLPGPU 2016 workshop (Co-Located with HiPEAC 2016), Prague, Czech Republic}",
month = "January",
year = 2016,
Url = "https://chrisb.host.cs.st-andrews.ac.uk/HLPGPU_2016_submissions_1-10_2016-01-21_14-36.zip"
}

M. Danelutto, T. De Matteis, G. Mencagli, and M. Torquati, “A divide-and-conquer parallel pattern implementation for multicores,” in Proceedings of the 3rd international workshop on software engineering for parallel systems, New York, NY, USA, 2016, pp. 10-19. doi:10.1145/3002125.3002128
[BibTeX] [URL]

@inproceedings{DAC:2016:SEPS,
author = {Danelutto, Marco and De Matteis, Tiziano and Mencagli, Gabriele and Torquati, Massimo},
title = {A Divide-and-conquer Parallel Pattern Implementation for Multicores},
booktitle = {Proceedings of the 3rd International Workshop on Software Engineering for Parallel Systems},
series = {SEPS 2016},
year = {2016},
isbn = {978-1-4503-4641-2},
location = {Amsterdam, Netherlands},
pages = {10--19},
numpages = {10},
url = {http://doi.acm.org/10.1145/3002125.3002128},
doi = {10.1145/3002125.3002128},
acmid = {3002128},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Divide and Conquer, High-level parallel patterns},
}

M. Danelutto and U. Albanese, “Data parallel patterns in Erlang/OpenCL,” in Parallel Computing: On the Road to Exascale, Munich, Germany, 2016, pp. 165-174.
[BibTeX]

@InProceedings{parco15-erlang-opencl,
author = "Marco Danelutto and Ugo Albanese",
title = "{Data parallel patterns in Erlang/OpenCL}",
booktitle = "{Parallel Computing: On the Road to Exascale}",
series = "{Advances in Parallel Computing}",
address = {Munich, Germany},
publisher = {IOS Press},
editor = "{Gerhard R. Joubert and Hugh Leather and Mark Parsons and Frans Peters and Mark Sawyer}",
volume = 27,
pages = {165--174},
isbn = "{978-1-61499-620-0 (print) | 978-1-61499-621-7 (online)}",
note = "{proceedings of PARCO 2015}",
year = 2016
}

M. Danelutto, M. Torquati, and P. Kilpatrick, “A DSL based toolchain for design space exploration in structured parallel programming,” Procedia computer science, vol. 80, pp. 1519-1530, 2016. doi:10.1016/j.procs.2016.05.477
[Abstract] [BibTeX] [URL]

@article{Danelutto20161519,
title = "A {DSL} Based Toolchain for Design Space Exploration in Structured Parallel Programming ",
journal = "Procedia Computer Science ",
volume = "80",
number = "",
pages = "1519 - 1530",
year = "2016",
note = "International Conference on Computational Science 2016, \{ICCS\} 2016, 6-8 June 2016, San Diego, California, \{USA\} ",
issn = "1877-0509",
doi = "10.1016/j.procs.2016.05.477",
url = "http://www.sciencedirect.com/science/article/pii/S1877050916309620",
author = "Marco Danelutto and Massimo Torquati and Peter Kilpatrick",
keywords = "parallel design patterns",
keywords = "DSL",
keywords = "design space exploration",
keywords = "non-functional concerns ",
abstract = "We introduce a DSL based toolchain supporting the design of parallel applications where parallelism is structured after parallel design pattern compositions. A DSL provides the possibility to write high level parallel design pattern expressions representing the structure of parallel applications, to refactor the pattern expressions, to evaluate their non-functional properties (e.g. ideal performance, total parallelism degree, etc.) and finally to generate parallel code ready to be compiled and run on different target architectures. We discuss a proof-of-concept prototype implementation of the proposed toolchain generating FastFlow code and show some preliminary results achieved using the prototype implementation. "
}

M. Danelutto, G. Mencagli, and M. Torquati, “Efficient dynamic memory allocation in data stream processing programs,” in Proc. of 2nd intl. workshop on reengineering for parallelism in heterogeneous parallel platforms (repara), Toulouse, France, 2016, pp. 1181-1188. doi:10.1109/UIC-ATC-ScalCom-CBDCom-IoP-SmartWorld.2016.123
[Abstract] [BibTeX]

@inproceedings{repara_scalcom_16,
Abstract = {Data Stream Processing is a paradigm enabling the real-time processing of live data streams coming from sources like sensors, financial tickers and social media. The history of the stream is often maintained in sliding windows and analyzed to produce timely notifications to the users. A challenging issue in the development of parallel implementations of such computations is efficient dynamic memory allocation. In this paper we study two parallel patterns for sliding-window computations and we discuss different implementation variants related to how dynamic memory is managed. The results show that the combined use of an efficient general-purpose memory allocator, and of a custom allocator for the pattern considered, results in significant performance optimizations.},
Address = {Toulouse, France},
Author = {Marco Danelutto and Gabriele Mencagli and Massimo Torquati},
Booktitle = {Proc. of 2nd Intl. Workshop on Reengineering for Parallelism in Heterogeneous Parallel Platforms (RePara)},
Doi = {10.1109/UIC-ATC-ScalCom-CBDCom-IoP-SmartWorld.2016.123},
Keywords = {fastflow, repara},
Month = jul,
Pages = {1181 - 1188},
Publisher = {IEEE},
Title = {Efficient Dynamic Memory Allocation in Data Stream Processing Programs},
Year = {2016}
}

M. Danelutto, J. D. Garcia, L. M. Sanchez, R. Sotomayor, and M. Torquati, “Introducing parallelism by using REPARA C++11 attributes,” in Proc. of the 24th euromicro international conference on parallel, distributed, and network-based processing (pdp 2016), Crete, Greece, 2016, pp. 354-358. doi:10.1109/PDP.2016.115
[Abstract] [BibTeX]

@inproceedings{repara:pdp:16,
Abstract = {Patterns provide a mechanism to express parallelism at a high level of abstraction and to make easier the transformation of existing legacy applications to target parallel frameworks. That also opens a path for writing new parallel applications. In this paper we introduce the REPARA approach for expressing parallel patterns and transforming the source code to parallelism frameworks. We take advantage of C++11 attributes as a mechanism to introduce annotations and enrich semantic information on valid source code. We also present a methodology for performing transformation of source code that allows to target multiple parallel programming models. Another contribution is a rule based mechanism to transform annotated code to those specific programming models. The REPARA approach requires programmer intervention only to perform initial code annotation while providing speedups that are comparable to those obtained by manual parallelization},
Address = {Crete, Greece},
Author = {Marco Danelutto and Jose Daniel Garcia and Luis Miguel Sanchez and Rafael Sotomayor and Massimo Torquati},
Booktitle={Proc. of the 24th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing (PDP 2016)},
Doi = {10.1109/PDP.2016.115},
Publisher = {IEEE},
Title = {Introducing Parallelism by using {REPARA} {C++11} Attributes},
Month={Feb},
Pages = {354 - 358},
Year = {2016}
}

M. Danelutto, T. De Matteis, G. Mencagli, and M. Torquati, “Data stream processing via code annotations,” The journal of supercomputing, pp. 1-15, 2016. doi:10.1007/s11227-016-1793-9
[Abstract] [BibTeX] [URL]

@Article{js2016,
author="Danelutto, Marco and De Matteis, Tiziano and Mencagli, Gabriele and Torquati, Massimo",
title="Data stream processing via code annotations",
journal="The Journal of Supercomputing",
year="2016",
pages="1--15",
abstract="Time-to-solution is an important metric when parallelizing existing code. The REPARA approach provides a systematic way to instantiate stream and data parallel patterns by annotating the sequential source code with C++11 attributes. Annotations are automatically transformed in a target
parallel code that uses existing libraries for parallel programming (e.g., FastFlow). In this paper, we apply this approach for the parallelization of a data stream processing application.
The description shows the effectiveness of the approach in easily and quickly prototyping several parallel variants of the sequential code by obtaining good overall performance in terms of both throughput and latency.",
issn="1573-0484",
doi="10.1007/s11227-016-1793-9",
url="http://dx.doi.org/10.1007/s11227-016-1793-9"
}

T. De Matteis and G. Mencagli, “Keep calm and react with foresight: strategies for low-latency and energy-efficient elastic data stream processing,” in Proceedings of the 21st acm sigplan symposium on principles and practice of parallel programming (ppopp), 2016, p. 13:1–13:12. doi:10.1145/2851141.2851148
[Abstract] [BibTeX] [URL] [Download PDF] [Slides]

@InProceedings{ppopp2016,
author = {De Matteis,Tiziano and Mencagli,Gabriele},
title = { Keep Calm and React with Foresight: Strategies for Low-Latency and Energy-Efficient Elastic Data Stream Processing},
booktitle = {Proceedings of the 21st ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (PPoPP)},
year = {2016},
pages = {13:1--13:12},
abstract = {This paper addresses the problem of designing control strategies for elastic stream processing applications. Elasticity allows applications to rapidly change their configuration (e.g. the number of used resources) on-the-fly, in response to fluctuations of their workload. In this work we face this problem by adopting the Model Predictive Control technique, a control-theoretic method aimed at finding the optimal application configuration along a limited prediction horizon by solving an online optimization problem. Our control strategies are designed to address latency constraints, by using Queueing Theory models, and energy consumption by changing the number of used cores and the CPU frequency through the Dynamic Voltage and Frequency Scaling (DVFS) function of modern multi-core CPUs. The proactive capabilities, in addition to the latency- and energy-awareness, represent the novel features of our approach. Experiments performed using a high-frequency trading application show the effectiveness compared with state-of-the-art techniques.},
articleno = {13},
awards = {The paper has passed the Artifact Evaluation},
doi = {10.1145/2851141.2851148},
isbn = {978-1-4503-4092-2},
location = {Barcelona, Spain},
numpages = {12},
slides = {https://docs.google.com/presentation/d/1VZ3y3RQDLFi_xA7Rl0Vj1iqBdoerxCMG4y53uMz9Ziw/edit?usp=sharing},
url = {http://doi.acm.org/10.1145/2851141.2851148}
}

T. De Matteis, “Parallel patterns for adaptive data stream processing,” PhD Thesis, 2016.
[BibTeX] [URL]

@PhdThesis{dematteis_phd16,
author = {De Matteis, Tiziano},
title = {Parallel Patterns for Adaptive Data Stream Processing},
school = {University of Pisa},
year = {2016},
url ={https://etd.adm.unipi.it/theses/available/etd-09152016-145603/unrestricted/PhdThesis_DeMatteis.pdf}
}

T. De Matteis, S. Di Girolamo, and G. Mencagli, “Continuous skyline queries on multicore architectures,” Concurrency and computation: practice and experience, vol. 28, iss. 12, pp. 3503-3522, 2016. doi:10.1002/cpe.3866
[Abstract] [BibTeX] [URL]

@article{ccpe2016,
author = {De Matteis, Tiziano and Di Girolamo, Salvatore and Mencagli, Gabriele},
title = { Continuous Skyline Queries on Multicore Architectures},
year={2016},
volume={28},
number={12},
journal = {Concurrency and Computation: Practice and Experience},
pages = {3503--3522},
issn = {1532-0634},
url = {http://dx.doi.org/10.1002/cpe.3866},
doi = {10.1002/cpe.3866},
abstract={The emergence of real-time decision-making applications in domains like high-frequency trading,
emergency management and service level analysis in communication networks, has led to the definition
of new classes of queries. Skyline queries are a notable example. Their results consist of all the tuples whose
attribute vector is not dominated (in the Pareto sense) by one of any other tuple. Because of their popularity,
skyline queries have been studied in terms of both sequential algorithms and parallel implementations for
multiprocessors and clusters. Within the Data Stream Processing paradigm, traditional database queries
on static relations have been revised in order to operate on continuous data streams. Most of the past
papers propose sequential algorithms for continuous skyline queries, whereas there exist very few works
targeting implementations on parallel machines. This paper contributes to fill this gap by proposing a parallel
implementation for multicore architectures. We propose: i) a parallelization of the eager algorithm based on
the notion of Skyline Influence Time, ii) optimizations of the reduce phase and load-balancing strategies to
achieve near-optimal speedup, iii) a set of experiments with both synthetic benchmarks and a real dataset in
order to show our implementation effectiveness}
}

D. De Sensi, “Predicting performance and power consumption of parallel applications,” in Proceedings of 24th euromicro international conference on parallel, distributed, and network-based processing (PDP), 2016, pp. 200-207. doi:10.1109/PDP.2016.41
[Abstract] [BibTeX] [URL] [Download PDF] [Slides]

@INPROCEEDINGS{models:pdp:16,
author = {De Sensi, Daniele},
booktitle = {Proceedings of 24th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing ({PDP})},
title = {Predicting Performance and Power Consumption of Parallel Applications},
year = {2016},
pages = {200 -- 207},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2016_PDP.pdf},
url = {http://ieeexplore.ieee.org/document/7445331/},
keywords = {Power-aware computing; Regression analysis; PARSEC benchmark; Control knobs; DVFS; Predictive models; Concurrency throttling;},
doi = {10.1109/PDP.2016.41},
month = {Feb},
abstract = {Current architectures provide many control knobs for the reduction of power consumption of applications, like reducing the number of used cores or scaling down their frequency. However, choosing the right values for these knobs in order to satisfy requirements on performance and/or power consumption is a complex task and trying all the possible combinations of these values is an unfeasible solution since it would require too much time. For this reasons, there is the need for techniques that allow an accurate estimation of the performance and power consumption of an application when a specific configuration of the control knobs values is used. Usually, this is done by executing the application with different configurations and by using these information to predict its behaviour when the values of the knobs are changed. However, since this is a time consuming process, we would like to execute the application in the fewest number of configurations possible. In this work, we consider as control knobs the number of cores used by the application and the frequency of these cores. We show that on most Parsec benchmark programs, by executing the application in 1% of the total possible configurations and by applying a multiple linear regression model we are able to achieve an average accuracy of 96% in predicting its execution time and power consumption in all the other possible knobs combinations.},
slides = {https://drive.google.com/open?id=1tl5DcBySHfA8A7eQhpxykiqofOCVee308VDAomUTHLQ},
}

D. De Sensi, M. Torquati, and M. Danelutto, “A reconfiguration algorithm for power-aware parallel applications,” Acm transactions on architecture and code optimization, vol. 13, iss. 4, p. 43:1–43:25, 2016. doi:10.1145/3004054
[Abstract] [BibTeX] [URL] [Download PDF] [Slides]

@article{nornir:taco16,
author = {De Sensi, Daniele and Torquati, Massimo and Danelutto, Marco},
title = {A Reconfiguration Algorithm for Power-Aware Parallel Applications},
journal = {ACM Transactions on Architecture and Code Optimization},
issue_date = {December 2016},
volume = {13},
number = {4},
month = dec,
year = {2016},
issn = {1544-3566},
pages = {43:1--43:25},
articleno = {43},
numpages = {25},
url = {http://doi.acm.org/10.1145/3004054},
openaccess = {http://dl.acm.org/authorize?N34888},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2016_TACO.pdf},
slides = {https://docs.google.com/presentation/d/1-3zD8pe75mWFA9vbvNgKhwqVMg7jC8ari34inUQwEyE/edit?usp=sharing},
videopitch = {https://www.youtube.com/watch?v=ckm-Zu8cLdA&feature=youtu.be},
doi = {10.1145/3004054},
acmid = {3004054},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {DVFS, Power-aware computing, dynamic concurrency throttling, multi-core, online learning, power capping, self-adaptive runtime},
abstract = {In current computing systems, many applications require guarantees on their maximum power consumption to not exceed the available power budget. On the other hand, for some applications, it could be possible to decrease their performance, yet maintaining an acceptable level, in order to reduce their power consumption. To provide such guarantees, a possible solution consists in changing the number of cores assigned to the application, their clock frequency and the placement of application threads over the cores. However, power consumption and performance have different trends depending on the application considered and on its input. Finding a configuration of resources satisfying user requirements is in the general case a challenging task.
In this paper we propose Nornir, an algorithm to automatically derive, without relying on historical data about previous executions, performance and power consumption models of an application in different configurations. By using these models, we are able to select a close to optimal configuration for the given user requirement, either performance or power consumption. The configuration of the application will be changed on-the-fly throughout the execution to adapt to workload fluctuations, external interferences and/or application's phase changes. We validate the algorithm by simulating it over the applications of the PARSEC benchmark suite. Then, we implement our algorithm and we analyse its accuracy and overhead over some of these applications on a real execution environment. Eventually, we compare the quality of our proposal with that of the optimal algorithm and of some state of the art solutions.}
}

D. De Sensi, M. Danelutto, and M. Torquati, “Power aware reconfigurations of parallel applications,” in Advanced computer architecture and compilation for high-performance and embedded systems (ACACES) — Poster abstracts, Fiuggi, Italy, 2016, pp. 141-144.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{acaces:16,
address = {Fiuggi, Italy},
author = {De Sensi, Daniele and Danelutto, Marco and Torquati, Massimo},
booktitle = {Advanced Computer Architecture and Compilation for High-Performance and Embedded Systems ({ACACES}) -- {P}oster abstracts},
isbn = {978-88-905806-4-2},
keywords = {power aware; self adaptive; parallel applications;},
publisher = {HiPEAC},
pages = {141 -- 144},
title = {Power aware reconfigurations of parallel applications},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2016_ACACES.pdf},
poster = {http://pages.di.unipi.it/desensi/assets/img/2016_ACACES.png},
year = {2016},
month = {July},
abstract = {Current architectures provide many possibilities for the reduction of power consumption of applications, such as reducing the number of used cores or scaling down their frequency. However, the amount of resources allocated to an application is usually static and fixed by the programmer or by the runtime. While there are cases where such a static choice may be appropriate, other scenarios may require to dynamically change the amount of resources during the application execution. Choosing the right amount of resources to use in order to satisfy requirements on performance and/or power consumption is a complex task and testing all the possible configurations is an unfeasible solution since it would require too much time. We show some solutions to this problem that, by acting on the number of cores used by the application an on the frequency of these cores are able to provide guarantees on maximum power consumption or on a minimum performance level. We then outline the main results achieved by applying these techniques to some real applications.},
}

M. F. Dolz, D. del Rio Astorga, J. Fernández, D. J. García, F. García-Carballeira, M. Danelutto, and M. Torquati, “Embedding semantics of the single-producer/single-consumer lock-free queue into a race detection tool,” in Proceedings of the 7th international workshop on programming models and applications for multicores and manycores, New York, NY, USA, 2016, pp. 20-29. doi:10.1145/2883404.2883406
[BibTeX] [URL]

@inproceedings{16:PMAM:SPSC,
author = {Dolz, Manuel F. and del Rio Astorga, David and Fern\'{a}ndez, Javier and Garc\'{\i}a, J. Daniel and Garc\'{\i}a-Carballeira, F{\'e}lix and Danelutto, Marco and Torquati, Massimo},
title = {Embedding Semantics of the Single-Producer/Single-Consumer Lock-Free Queue into a Race Detection Tool},
booktitle = {Proceedings of the 7th International Workshop on Programming Models and Applications for Multicores and Manycores},
series = {PMAM'16},
year = {2016},
isbn = {978-1-4503-4196-7},
location = {Barcelona, Spain},
pages = {20--29},
numpages = {10},
url = {http://doi.acm.org/10.1145/2883404.2883406},
doi = {10.1145/2883404.2883406},
acmid = {2883406},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Data race detectors, Parallel programming, Semantics, Wait-/lock-free parallel structures},
}

D. Griebler, M. Danelutto, M. Torquati, and L. G. Fernandes, “An Embedded C++ Domain-Specific Language for Stream Parallelism,” in Parallel Computing: On the Road to Exascale, Munich, Germany, 2016, pp. 317-326.
[BibTeX]

@InProceedings{parco15-dalvan,
author = "Dalvan Griebler and Marco Danelutto and Massimo Torquati and Luiz Gustavo Fernandes",
title = "{An Embedded C++ Domain-Specific Language for Stream Parallelism}",
booktitle = "{Parallel Computing: On the Road to Exascale}",
series = "{Advances in Parallel Computing}",
address = {Munich, Germany},
publisher = {IOS Press},
editor = "{Gerhard R. Joubert and Hugh Leather and Mark Parsons and Frans Peters and Mark Sawyer}",
volume = 27,
pages = {317--326},
isbn = "{978-1-61499-620-0 (print) | 978-1-61499-621-7 (online)}",
note = "{proceedings of PARCO 2015}",
year = 2016
}

V. Janjic, C. Brown, K. MacKenzie, K. and Hammond, M. Danelutto, M. Aldinucci, and J. D. Garcia, “RPL: a domain-specific language for designing and implementing parallel c++ applications,” in Proc. of intl. euromicro pdp 2016: parallel distributed and network-based processing, Crete, Greece, 2016. doi:10.1109/PDP.2016.97
[Abstract] [BibTeX] [Download PDF]

@inproceedings{rpl:pdp:16,
Abstract = {Parallelising sequential applications is usually a very hard job, due to many different ways in which an application can be parallelised and a large number of programming models (each with its own advantages and disadvantages) that can be used. In this paper, we describe a method to semi- automatically generate and evaluate different parallelisations of the same application, allowing programmers to find the best parallelisation without significant manual reengineering of the code. We describe a novel, high-level domain-specific language, Refactoring Pattern Language (RPL), that is used to represent the parallel structure of an application and to capture its extra-functional properties (such as service time). We then describe a set of RPL rewrite rules that can be used to generate alternative, but semantically equivalent, parallel structures (parallelisations) of the same application. We also describe the RPL Shell that can be used to evaluate these parallelisations, in terms of the desired extra-functional properties. Finally, we describe a set of C++ refactorings, targeting OpenMP, Intel TBB and FastFlow parallel programming models, that semi-automatically apply the desired parallelisation to the application's source code, therefore giving a parallel version of the code. We demonstrate how the RPL and the refactoring rules can be used to derive efficient parallelisations of two realistic C++ use cases (Image Convolution and Ant Colony Optimisation).},
Address = {Crete, Greece},
Author = {Vladimir Janjic and Christopher Brown and Kenneth MacKenzie and and Kevin Hammond and Marco Danelutto and Marco Aldinucci and Jose Daniel Garcia},
Booktitle = {Proc. of Intl. Euromicro PDP 2016: Parallel Distributed and network-based Processing},
Doi = {10.1109/PDP.2016.97},
Publisher = {IEEE},
Title = {{RPL}: A Domain-Specific Language for Designing and Implementing Parallel C++ Applications},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2016_pdp_rpl.pdf},
Year = {2016},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2016_pdp_rpl.pdf}}

2015

M. Aldinucci, A. Bracciali, T. Marschall, M. Patterson, N. Pisanti, and M. Torquati, “High-performance haplotype assembly,” in Proc. of the 11th intl. meeting on computational intelligence methods for bioinformatics and biostatistics (cibb 2014), Cambridge, UK, 2015.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{14:ff:whatsapp:cibb,
Abstract = {The problem of Haplotype Assembly is an essential step in human genome analysis. It is typically formalised as the Minimum Error Correction (MEC) problem which is NP-hard. MEC has been approached using heuristics, integer linear programming, and fixed-parameter tractability (FPT), including approaches whose runtime is exponential in the length of the DNA fragments obtained by the sequencing process. Technological improvements are currently increasing fragment length, which drastically elevates computational costs for such methods. We present pWhatsHap, a multi-core parallelisation of WhatsHap, a recent FPT optimal approach to MEC. WhatsHap moves complexity from fragment length to fragment overlap and is hence of particular interest when considering sequencing technology's current trends. pWhatsHap further improves the efficiency in solving the MEC problem, as shown by experiments performed on datasets with high coverage.},
Address = {Cambridge, UK},
Author = {Marco Aldinucci and Andrea Bracciali and Tobias Marschall and Murray Patterson and Nadia Pisanti and Massimo Torquati},
Booktitle = {Proc. of the 11th Intl. meeting on Computational Intelligence methods for Bioinformatics and Biostatistics (CIBB 2014)},
Date-Added = {2014-12-01 23:07:21 +0000},
Date-Modified = {2015-08-28 17:05:56 +0000},
Keywords = {fastflow, bioinformatics},
Publisher = {Springer},
Series = {{LNBI}},
Title = {High-Performance Haplotype Assembly},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2014_pHaplo_cibb.pdf},
Year = {2015},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2014_pHaplo_cibb.pdf}}

M. Aldinucci, G. Peretti Pezzi, M. Drocco, C. Spampinato, and M. Torquati, “Parallel visual data restoration on multi-GPGPUs using stencil-reduce pattern,” International journal of high performance computing application, 2015. doi:10.1177/1094342014567907
[Abstract] [BibTeX] [URL] [Download PDF]

@article{ff:denoiser:ijhpca:15,
Abstract = {In this paper, a highly effective parallel filter for visual data restoration is presented. The filter is designed following a skeletal approach, using a newly proposed stencil-reduce, and has been implemented by way of the FastFlow parallel programming library. As a result of its high-level design, it is possible to run the filter seamlessly on a multicore machine, on multi-GPGPUs, or on both. The design and implementation of the filter are discussed, and an experimental evaluation is presented.},
Author = {Marco Aldinucci and Guilherme {Peretti Pezzi} and Maurizio Drocco and Concetto Spampinato and Massimo Torquati},
Date-Added = {2014-08-23 00:06:10 +0000},
Date-Modified = {2015-09-24 11:21:20 +0000},
Doi = {10.1177/1094342014567907},
Journal = {International Journal of High Performance Computing Application},
Keywords = {fastflow, paraphrase, impact, nvidia},
Title = {Parallel Visual Data Restoration on Multi-{GPGPUs} using Stencil-Reduce Pattern},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2015_ff_stencilreduce_ijhpca.pdf},
Year = {2015},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2015_ff_stencilreduce_ijhpca.pdf},
url = {http://dx.doi.org/10.1177/1094342014567907}}

M. Aldinucci, M. Danelutto, M. Drocco, P. Kilpatrick, G. Peretti Pezzi, and M. Torquati, “The loop-of-stencil-reduce paradigm,” in Proc. of intl. workshop on reengineering for parallelism in heterogeneous parallel platforms (repara), Helsinki, Finland, 2015, pp. 172-177. doi:10.1109/Trustcom.2015.628
[Abstract] [BibTeX] [Download PDF]

@inproceedings{opencl:ff:ispa:15,
Abstract = {In this paper we advocate the Loop-of-stencil- reduce pattern as a way to simplify the parallel programming of heterogeneous platforms (multicore+GPUs). Loop-of-Stencil-reduce is general enough to subsume map, reduce, map-reduce, stencil, stencil-reduce, and, crucially, their usage in a loop. It transparently targets (by using OpenCL) combinations of CPU cores and GPUs, and it makes it possible to simplify the deployment of a single stencil computation kernel on different GPUs. The paper discusses the implementation of Loop-of-stencil-reduce within the FastFlow parallel framework, considering a simple iterative data-parallel application as running example (Game of Life) and a highly effective parallel filter for visual data restoration to assess performance. Thanks to the high-level design of the Loop-of-stencil-reduce, it was possible to run the filter seamlessly on a multicore machine, on multi-GPUs, and on both.},
Address = {Helsinki, Finland},
Author = {Marco Aldinucci and Marco Danelutto and Maurizio Drocco and Peter Kilpatrick and Guilherme {Peretti Pezzi} and Massimo Torquati},
Booktitle = {Proc. of Intl. Workshop on Reengineering for Parallelism in Heterogeneous Parallel Platforms (RePara)},
Date-Added = {2015-07-05 09:48:33 +0000},
Date-Modified = {2015-09-24 11:14:56 +0000},
Doi = {10.1109/Trustcom.2015.628},
Keywords = {fastflow, repara, nvidia},
Month = aug,
Pages = {172-177},
Publisher = {IEEE},
Title = {The Loop-of-Stencil-Reduce paradigm},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2015_RePara_ISPA.pdf},
Year = {2015},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2015_RePara_ISPA.pdf}}

M. Aldinucci, S. Campa, M. Danelutto, P. Kilpatrick, and M. Torquati, “Pool evolution: a parallel pattern for evolutionary and symbolic computing,” International journal of parallel programming, pp. 1-21, 2015. doi:10.1007/s10766-015-0358-5
[Abstract] [BibTeX] [Download PDF]

@article{pool:ijpp:15,
Abstract = {We introduce a new parallel pattern derived from a specific application domain and show how it turns out to have application beyond its domain of origin. The pool evolution pattern models the parallel evolution of a population subject to mutations and evolving in such a way that a given fitness function is optimized. The pattern has been demonstrated to be suitable for capturing and modeling the parallel patterns underpinning various evolutionary algorithms, as well as other parallel patterns typical of symbolic computation. In this paper we introduce the pattern, we discuss its implementation on modern multi/many core architectures and finally present experimental results obtained with FastFlow and Erlang implementations to assess its feasibility and scalability.},
Author = {Marco Aldinucci and Sonia Campa and Marco Danelutto and Peter Kilpatrick and Massimo Torquati},
Date-Added = {2015-03-21 22:15:47 +0000},
Date-Modified = {2015-09-24 11:15:53 +0000},
Doi = {10.1007/s10766-015-0358-5},
Issn = {0885-7458},
Journal = {International Journal of Parallel Programming},
Keywords = {fastflow, paraphrase, repara},
Month = mar,
Pages = {1-21},
Publisher = {Springer US},
Title = {Pool Evolution: A Parallel Pattern for Evolutionary and Symbolic Computing},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2015_ff_pool_ijpp.pdf},
Year = {2015},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2015_ff_pool_ijpp.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/s10766-015-0358-5}}

M. Danelutto, M. Torquati, and P. Kilpatrick, “A Green Perspective on Structured Parallel Programming,” in 23rd Euromicro International Conference on Parallel, Distributed, and Network-Based Processing, PDP 2015, Turku, Finland, March 4-6, 2015, 2015, pp. 430-437. doi:10.1109/PDP.2015.116
[Abstract] [BibTeX] [URL]

@inproceedings{green:perspective:turku:16,
author = {Marco Danelutto and
Massimo Torquati and
Peter Kilpatrick},
title = "{A Green Perspective on Structured Parallel Programming}",
booktitle = "{23rd Euromicro International Conference on Parallel, Distributed, and Network-Based Processing, {PDP} 2015, Turku, Finland, March 4-6, 2015}",
editor = {Masoud Daneshtalab and
Marco Aldinucci and
Ville Lepp{\"{a}}nen and
Johan Lilius and
Mats Brorsson},
isbn = {978-1-4799-8491-6},
pages = {430--437},
year = {2015},
url = {http://dx.doi.org/10.1109/PDP.2015.116},
doi = {10.1109/PDP.2015.116},
publisher = {{IEEE} Computer Society},
timestamp = {Fri, 08 Apr 2016 13:26:54 +0200},
biburl = {http://dblp.uni-trier.de/rec/bib/conf/pdp/DaneluttoTK15},
bibsource = {dblp computer science bibliography, http://dblp.org},
Abstract = {Structured parallel programming, and in particular programming models using the algorithmic skeleton or parallel design pattern concepts, are increasingly considered to be the only viable means of supporting effective development of scalable and efficient parallel programs. Structured parallel programming models have been assessed in a number of works in the context of performance. In this paper we consider how the use of structured parallel programming models allows knowledge of the parallel patterns present to be harnessed to address both performance and energy consumption. We consider different features of structured parallel programming that may be leveraged to impact the performance/energy trade-off and we discuss a preliminary set of experiments validating our claims.
}
}

M. Danelutto, D. De Sensi, and M. Torquati, “Energy driven adaptivity in stream parallel computations,” in Proceedings of 23th euromicro international conference on parallel, distributed, and network-based processing (PDP), Turku, Finland, 2015, pp. 103-110. doi:10.1109/PDP.2015.92
[Abstract] [BibTeX] [URL] [Download PDF] [Slides]

@InProceedings{ff:energy:pdp:15,
author = {Danelutto, Marco and De Sensi, Daniele and Torquati, Massimo},
title = {Energy driven adaptivity in stream parallel computations},
booktitle = {Proceedings of 23th Euromicro International Conference on Parallel, Distributed, and Network-Based Processing ({PDP})},
year = {2015},
address = {Turku, Finland},
publisher = {IEEE},
pages = {103 -- 110},
doi = {10.1109/PDP.2015.92},
ISSN = {1066-6192},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2015_PDP.pdf},
url = {http://ieeexplore.ieee.org/document/7092707/},
date-added = {2015-02-28 10:59:38 +0000},
date-modified = {2015-02-28 11:01:23 +0000},
keywords = {fastflow, stream parallel, energy consumption, power-aware computing},
abstract = {Determining the right amount of resources needed for a given computation is a critical problem. In many cases, computing systems are configured to use an amount of resources to manage high load peaks even though this cause energy waste when the resources are not fully utilised. To avoid this problem, adaptive approaches are used to dynamically increase/decrease computational resources depending on the real needs. A different approach based on Dynamic Voltage and Frequency Scaling (DVFS) is emerging as a possible alternative solution to reduce energy consumption of idle CPUs by lowering their frequencies. In this work, we propose to tackle the problem in stream parallel computations by using both the classic adaptivity concepts and the possibility provided by modern CPUs to dynamically change their frequency. We validate our approach showing a real network application that performs Deep Packet Inspection over network traffic. We are able to manage bandwidth changing over time, guaranteeing minimal packet loss during reconfiguration and minimal energy consumption.},
slides = {https://drive.google.com/open?id=1b9am66LlxC4b1tKtocbh_dv7BMxCbVNoO-_lYKDcprc},
}

M. Danelutto, T. De Matteis, G. Mencagli, and M. Torquati, “Parallelizing high-frequency trading applications by using C++11 attributes,” in Proc. of intl. workshop on reengineering for parallelism in heterogeneous parallel platforms (repara), Helsinki, Finland, 2015, pp. 140-147. doi:10.1109/Trustcom.2015.623
[Abstract] [BibTeX]

@inproceedings{repara_ispa_15,
Abstract = { With the wide diffusion of parallel architectures parallelism has become an indispensable factor in the application design. However, the cost of the parallelization process of existing applications is still too high in terms of time-to-development, and often requires a large effort and expertise by the programmer. The REPARA methodology consists in a systematic way to express parallel patterns by annotating the source code using C+ +11 attributes transformed automatically in a target parallel code based on parallel programming libraries (e.g. FastFlow, Intel TBB). In this paper we apply this approach in the parallelization of a real high-frequency trading application. The description shows the effectiveness of the approach in easily prototyping several parallel variants of the same code. We also propose an extension of a REPARA attribute to express a user-defined scheduling strategy, which makes it possible to design a high-throughput and low-latency parallelization of our code outperforming the other parallel variants in most of the considered test-cases.},
Address = {Helsinki, Finland},
Author = {Danelutto, Marco and De Matteis, Tiziano and Mencagli, Gabriele and Torquati, Massimo},
Booktitle = {Proc. of Intl. Workshop on Reengineering for Parallelism in Heterogeneous Parallel Platforms (RePara)},
Doi = {10.1109/Trustcom.2015.623},
Keywords = {fastflow, repara},
Month = aug,
Pages = {140-147},
Publisher = {IEEE},
Title = {Parallelizing High-Frequency Trading Applications by using {C++11} Attributes},
Year = {2015}
}

M. Danelutto and M. Torquati, “Structured parallel programming with "core" fastflow,” in Central european functional programming school, V. Zsók, Z. Horváth, and L. Csató, Eds., Springer, 2015, vol. 8606, pp. 29-75. doi:10.1007/978-3-319-15940-9_2
[Abstract] [BibTeX] [URL]

@incollection{tutorial:ff:15,
Abstract = {FastFlow is an open source, structured parallel programming framework originally conceived to support highly efficient stream parallel computation while targeting shared memory multi cores. Its efficiency mainly comes from the optimized implementation of the base communication mechanisms and from its layered design. FastFlow eventually provides the parallel applications programmers with a set of ready-to-use, parametric algorithmic skeletons modeling the most common parallelism exploitation patterns. The algorithmic skeleton provided by FastFlow may be freely nested to model more and more complex parallelism exploitation patterns. This tutorial describes the ``core'' FastFlow, that is the set of skeletons supported since version 1.0 in FastFlow, and outlines the recent advances aimed at (i) introducing new, higher level skeletons and (ii) targeting networked multi cores, possibly equipped with GPUs, in addition to single multi/many core processing elements.
},
Author = {Danelutto, Marco and Torquati, Massimo},
Booktitle = {Central European Functional Programming School},
Date-Added = {2015-05-07 14:30:40 +0000},
Date-Modified = {2015-09-27 12:12:49 +0000},
Doi = {10.1007/978-3-319-15940-9_2},
Editor = {Zs{\'o}k, Vikt{\'o}ria and Horv{\'a}th, Zolt{\'a}n and Csat{\'o}, Lehel},
Isbn = {978-3-319-15939-3},
Keywords = {fastflow, paraphrase},
Pages = {29-75},
Publisher = {Springer},
Series = {LNCS},
Title = {Structured Parallel Programming with "core" FastFlow},
Url = {http://dx.doi.org/10.1007/978-3-319-15940-9_2},
Volume = {8606},
Year = {2015},
Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-319-15940-9_2}}

T. De Matteis, S. Di Girolamo, and G. Mencagli, “A multicore parallelization of continuous skyline queries on data streams,” in Proceedings of the 2015 international conference on parallel processing (euro-par), Vienna, Austria, 2015, pp. 402-413. doi:10.1007/978-3-662-48096-0_31
[Abstract] [BibTeX] [Slides]

@InProceedings{europar2015,
author = {De Matteis,Tiziano and Di Girolamo, Salvatore and Mencagli,Gabriele},
title = {A Multicore Parallelization of Continuous Skyline Queries on Data Streams},
booktitle = { Proceedings of the 2015 International Conference on Parallel Processing (Euro-Par)},
year = {2015},
pages = {402--413},
address = {Vienna, Austria},
abstract = {Skyline queries are a relevant example of preference queries frequently used in multi-criteria decision making to retrieve interesting points from large datasets. They return the points whose attribute vector is not dominated by any other point. Due to their importance in real-time scenarios, skyline queries have been studied both in terms of sequential algorithms and parallel implementations for multiprocessors and clusters. Recently, with the advent of the Data Stream Processing paradigm, skyline queries have been computed over continuous data streams according to the sliding window model. Although sequential algorithms have been proposed for continuous skyline queries, few works targeting modern parallel architectures exist. This paper contributes to the current
literature by proposing a parallel implementation on multicores. We provide a description of our parallelization by focusing on the cooperation pattern between parallel functionalities, optimizations related to the reduce phase, and load-balancing strategies. Finally, we show experiments using different point distributions, arrival rates and window lengths.},
doi = {10.1007/978-3-662-48096-0_31},
isbn = {978-3-662-48095-3},
slides = {https://docs.google.com/presentation/d/1JQVn9QnLC15e_MhmNOttP3mohray_sulAO532PJqOy4/edit?usp=sharing}
}

2014

M. Aldinucci, S. Ruggieri, and M. Torquati, “Decision tree building on multi-core using fastflow,” Concurrency and computation: practice and experience, vol. 26, iss. 3, pp. 800-820, 2014. doi:10.1002/cpe.3063
[Abstract] [BibTeX] [Download PDF]

@article{yadtff:ccpe:13,
Abstract = {The whole computer hardware industry embraced multi-core. The extreme optimisation of sequential algorithms is then no longer sufficient to squeeze the real machine power, which can be only exploited via thread-level parallelism. Decision tree algorithms exhibit natural concurrency that makes them suitable to be parallelised. This paper presents an in-depth study of the parallelisation of an implementation of the C4.5 algorithm for multi-core architectures. We characterise elapsed time lower bounds for the forms of parallelisations adopted, and achieve close to optimal performances. Our implementation is based on the FastFlow parallel programming environment and it requires minimal changes to the original sequential code.},
Author = {Marco Aldinucci and Salvatore Ruggieri and Massimo Torquati},
Date-Added = {2014-12-21 17:46:33 +0000},
Date-Modified = {2015-09-27 12:17:52 +0000},
Doi = {10.1002/cpe.3063},
Journal = {Concurrency and Computation: Practice and Experience},
Keywords = {fastflow, paraphrase},
Number = {3},
Pages = {800-820},
Title = {Decision Tree Building on Multi-Core using FastFlow},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2013_yadtff_ccpe.pdf},
Volume = {26},
Year = {2014},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2013_yadtff_ccpe.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1002/cpe.3063}}

M. Aldinucci, M. Torquati, M. Drocco, G. Peretti Pezzi, and C. Spampinato, “An overview of fastflow: combining pattern-level abstraction and efficiency in GPGPUs,” in Gpu technology conference (gtc 2014), San Jose, CA, USA, 2014.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{ff:gtc:2014:short,
Abstract = {Get an overview of FastFlow's parallel patterns can be used to design parallel applications for execution on both CPUs and GPGPUs while avoiding most of the complex low-level detail needed to make them efficient, portable and rapid to prototype. For a more detailed and technical review of FastFlow's parallel patterns as well as a use case where we will show the design and effectiveness of a novel universal image filtering template based on the variational approach.},
Address = {San Jose, CA, USA},
Author = {Marco Aldinucci and Massimo Torquati and Maurizio Drocco and Guilherme {Peretti Pezzi} and Concetto Spampinato},
Booktitle = {GPU Technology Conference (GTC 2014)},
Date-Added = {2014-04-13 23:20:52 +0000},
Date-Modified = {2015-09-27 12:15:33 +0000},
Keywords = {fastflow, gpu, nvidia, impact, paraphrase, submitted},
Month = mar,
Title = {An Overview of FastFlow: Combining Pattern-Level Abstraction and Efficiency in {GPGPUs}},
pdf = {http://calvados.di.unipi.it/storage/talks/2014_S4585-Marco-Aldinucci.pdf},
Year = {2014},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/talks/2014_S4585-Marco-Aldinucci.pdf}}

M. Aldinucci, S. Campa, M. Danelutto, P. Kilpatrick, and M. Torquati, “Design patterns percolating to parallel programming framework implementation,” International journal of parallel programming, vol. 42, iss. 6, pp. 1012-1031, 2014. doi:10.1007/s10766-013-0273-6
[Abstract] [BibTeX] [Download PDF]

@article{ijpp:patterns:13,
Abstract = {Structured parallel programming is recognised as a viable and effective means of tackling parallel programming problems. Recently, a set of simple and powerful parallel building blocks (RISC-pb2l) has been proposed to support modelling and implementation of parallel frameworks. In this work we demonstrate how that same parallel building block set may be used to model both general purpose parallel programming abstractions, not usually listed in classical skeleton sets, and more specialized domain specific parallel patterns. We show how an implementation of RISC-pb2l can be realised via the FastFlow framework and present experimental evidence of the feasibility and efficiency of the approach.},
Author = {Marco Aldinucci and Sonia Campa and Marco Danelutto and Peter Kilpatrick and Massimo Torquati},
Date-Added = {2014-12-21 17:47:21 +0000},
Date-Modified = {2015-09-27 12:32:37 +0000},
Doi = {10.1007/s10766-013-0273-6},
Issn = {0885-7458},
Journal = {International Journal of Parallel Programming},
Keywords = {fastflow, paraphrase},
Number = {6},
Pages = {1012-1031},
Title = {Design patterns percolating to parallel programming framework implementation},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2013_ijpp_patterns-web.pdf},
Volume = {42},
Year = {2014},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2013_ijpp_patterns.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/s10766-013-0273-6},
Bdsk-Url-3 = {http://calvados.di.unipi.it/storage/paper_files/2013_ijpp_patterns-web.pdf}}

M. Aldinucci, M. Danelutto, P. Kilpatrick, and M. Torquati, “Fastflow: high-level and efficient streaming on multi-core,” in Programming multi-core and many-core computing systems, S. Pllana and F. Xhafa, Eds., Wiley, 2014.
[Abstract] [BibTeX] [Download PDF]

@incollection{ff:wileybook:14,
Abstract = {A FastFlow short tutorial},
Annote = {ISBN: 0470936908},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick and Massimo Torquati},
Booktitle = {Programming Multi-core and Many-core Computing Systems},
Chapter = {13},
Date-Added = {2011-06-18 18:28:00 +0200},
Date-Modified = {2014-12-31 14:14:28 +0000},
Editor = {Sabri Pllana and Fatos Xhafa},
Keywords = {fastflow},
Month = oct,
Publisher = {Wiley},
Series = {Parallel and Distributed Computing},
Title = {FastFlow: high-level and efficient streaming on multi-core},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2011_FF_tutorial-draft.pdf},
Year = {2014},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2011_FF_tutorial-draft.pdf}}

M. Aldinucci, G. Peretti Pezzi, M. Drocco, F. Tordini, P. Kilpatrick, and M. Torquati, “Parallel video denoising on heterogeneous platforms,” in Proc. of intl. workshop on high-level programming for heterogeneous and hierarchical parallel systems (hlpgpu), 2014.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{ff:video:hlpgpu:14,
Abstract = {In this paper, a highly-effective parallel filter for video denoising is presented. The filter is designed using a skeletal approach, and has been implemented by way of the FastFlow parallel programming library. As a result of its high-level design, it is possible to run the filter seamlessly on a multi-core machine, on GPGPU(s), or on both. The design and the implementation of the filter are discussed, and an experimental evaluation is presented. Various mappings of the filtering stages are comparatively discussed.},
Author = {Marco Aldinucci and Guilherme {Peretti Pezzi} and Maurizio Drocco and Fabio Tordini and Peter Kilpatrick and Massimo Torquati},
Booktitle = {Proc. of Intl. Workshop on High-level Programming for Heterogeneous and Hierarchical Parallel Systems (HLPGPU)},
Date-Added = {2013-12-07 18:28:32 +0000},
Date-Modified = {2015-09-27 12:42:02 +0000},
Keywords = {fastflow, paraphrase, impact},
Title = {Parallel video denoising on heterogeneous platforms},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2014_ff_video_denoiser_hlpgpu.pdf},
Year = {2014},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2014_ff_video_denoiser_hlpgpu.pdf}}

M. Aldinucci, M. Torquati, M. Drocco, G. Peretti Pezzi, and C. Spampinato, “Fastflow: combining pattern-level abstraction and efficiency in GPGPUs,” in Gpu technology conference (gtc 2014), San Jose, CA, USA, 2014.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{ff:gtc:2014,
Abstract = {Learn how FastFlow's parallel patterns can be used to design parallel applications for execution on both CPUs and GPGPUs while avoiding most of the complex low-level detail needed to make them efficient, portable and rapid to prototype. As use case, we will show the design and effectiveness of a novel universal image filtering template based on the variational approach.},
Address = {San Jose, CA, USA},
Author = {Marco Aldinucci and Massimo Torquati and Maurizio Drocco and Guilherme {Peretti Pezzi} and Concetto Spampinato},
Booktitle = {GPU Technology Conference (GTC 2014)},
Date-Added = {2014-04-19 12:52:40 +0000},
Date-Modified = {2015-09-27 12:15:46 +0000},
Keywords = {fastflow, gpu, nvidia, impact, paraphrase, submitted},
Month = mar,
Title = {FastFlow: Combining Pattern-Level Abstraction and Efficiency in {GPGPUs}},
pdf = {http://calvados.di.unipi.it/storage/talks/2014_S4729-Marco-Aldinucci.pdf},
Year = {2014},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/talks/2014_S4729-Marco-Aldinucci.pdf}}

M. Aldinucci, C. Calcagno, M. Coppo, F. Damiani, M. Drocco, E. Sciacca, S. Spinella, M. Torquati, and A. Troina, “On designing multicore-aware simulators for systems biology endowed with on-line statistics,” Biomed research international, 2014. doi:10.1155/2014/207041
[Abstract] [BibTeX] [Download PDF]

@article{cwcsim:ff:multicore:biomed:14,
Abstract = {The paper arguments are on enabling methodologies for the design of a fully parallel, online, interactive tool aiming to support the bioinformatics scientists .In particular, the features of these methodologies, supported by the FastFlow parallel programming framework, are shown on a simulation tool to perform the modeling, the tuning, and the sensitivity analysis of stochastic biological models. A stochastic simulation needs thousands of independent simulation trajectories turning into big data that should be analysed by statistic and data mining tools. In the considered approach the two stages are pipelined in such a way that the simulation stage streams out the partial results of all simulation trajectories to the analysis stage that immediately produces a partial result. The simulation-analysis workflow is validated for performance and effectiveness of the online analysis in capturing biological systems behavior on a multicore platform and representative proof-of-concept biological systems. The exploited methodologies include pattern-based parallel programming and data streaming that provide key features to the software designers such as performance portability and efficient in-memory (big) data management and movement. Two paradigmatic classes of biological systems exhibiting multistable and oscillatory behavior are used as a testbed.},
Author = {Marco Aldinucci and Cristina Calcagno and Mario Coppo and Ferruccio Damiani and Maurizio Drocco and Eva Sciacca and Salvatore Spinella and Massimo Torquati and Angelo Troina},
Date-Added = {2014-06-26 21:30:32 +0000},
Date-Modified = {2015-09-27 12:17:05 +0000},
Doi = {10.1155/2014/207041},
Journal = {BioMed Research International},
Keywords = {fastflow,bioinformatics, paraphrase, biobits},
Title = {On designing multicore-aware simulators for systems biology endowed with on-line statistics},
pdf = {http://downloads.hindawi.com/journals/bmri/2014/207041.pdf},
Year = {2014},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2014_ff_cwc_bmri.pdf},
Bdsk-Url-2 = {http://downloads.hindawi.com/journals/bmri/2014/207041.pdf},
Bdsk-Url-3 = {http://dx.doi.org/10.1155/2014/207041}}

M. Aldinucci, M. Drocco, G. Peretti Pezzi, C. Misale, F. Tordini, and M. Torquati, “Exercising high-level parallel programming on streams: a systems biology use case,” in Proc. of the 2014 ieee 34th intl. conference on distributed computing systems workshops (icdcs), Madrid, Spain, 2014. doi:10.1109/ICDCSW.2014.38
[Abstract] [BibTeX] [Download PDF]

@inproceedings{cwc:gpu:dcperf:14,
Abstract = {The stochastic modelling of biological systems, cou- pled with Monte Carlo simulation of models, is an increasingly popular technique in Bioinformatics. The simulation-analysis workflow may result into a computationally expensive task reducing the interactivity required in the model tuning. In this work, we advocate high-level software design as a vehicle for building efficient and portable parallel simulators for a variety of platforms, ranging from multi-core platforms to GPGPUs to cloud. In particular, the Calculus of Wrapped Compartments (CWC) parallel simulator for systems biology equipped with on- line mining of results, which is designed according to the FastFlow pattern-based approach, is discussed as a running example. In this work, the CWC simulator is used as a paradigmatic example of a complex C++ application where the quality of results is correlated with both computation and I/O bounds, and where high-quality results might turn into big data. The FastFlow parallel programming framework, which advocates C++ pattern- based parallel programming makes it possible to develop portable parallel code without relinquish neither run-time efficiency nor performance tuning opportunities. Performance and effectiveness of the approach are validated on a variety of platforms, inter-alia cache-coherent multi-cores, cluster of multi-core (Ethernet and Infiniband) and the Amazon Elastic Compute Cloud.},
Address = {Madrid, Spain},
Author = {Marco Aldinucci and Maurizio Drocco and Guilherme {Peretti Pezzi} and Claudia Misale and Fabio Tordini and Massimo Torquati},
Booktitle = {Proc. of the 2014 IEEE 34th Intl. Conference on Distributed Computing Systems Workshops (ICDCS)},
Date-Added = {2014-04-19 12:44:39 +0000},
Date-Modified = {2015-09-27 12:43:13 +0000},
Doi = {10.1109/ICDCSW.2014.38},
Keywords = {fastflow, gpu, bioinformatics, paraphrase, impact, nvidia},
Publisher = {IEEE},
Title = {Exercising high-level parallel programming on streams: a systems biology use case},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2014_dcperf_cwc_gpu.pdf},
Year = {2014},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2014_dcperf_cwc_gpu.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1109/ICDCSW.2014.38}}

M. Aldinucci, M. Torquati, C. Spampinato, M. Drocco, C. Misale, C. Calcagno, and M. Coppo, “Parallel stochastic systems biology in the cloud,” Briefings in bioinformatics, vol. 15, iss. 5, pp. 798-813, 2014. doi:10.1093/bib/bbt040
[Abstract] [BibTeX] [Download PDF]

@article{cwc:cloud:bib:13,
Abstract = {The stochastic modelling of biological systems, coupled with Monte Carlo simulation of models, is an increasingly popular technique in bioinformatics. The simulation-analysis workflow may result computationally expensive reducing the interactivity required in the model tuning. In this work, we advocate the high-level software design as a vehicle for building efficient and portable parallel simulators for the cloud. In particular, the Calculus of Wrapped Components (CWC) simulator for systems biology, which is designed according to the FastFlow pattern-based approach, is presented and discussed. Thanks to the FastFlow framework, the CWC simulator is designed as a high-level workflow that can simulate CWC models, merge simulation results and statistically analyse them in a single parallel workflow in the cloud. To improve interactivity, successive phases are pipelined in such a way that the workflow begins to output a stream of analysis results immediately after simulation is started. Performance and effectiveness of the CWC simulator are validated on the Amazon Elastic Compute Cloud.},
Author = {Marco Aldinucci and Massimo Torquati and Concetto Spampinato and Maurizio Drocco and Claudia Misale and Cristina Calcagno and Mario Coppo},
Date-Added = {2014-12-21 17:49:54 +0000},
Date-Modified = {2015-09-27 12:33:52 +0000},
Doi = {10.1093/bib/bbt040},
Issn = {1467-5463},
Journal = {Briefings in Bioinformatics},
Keywords = {fastflow, bioinformatics, cloud, paraphrase, impact, biobits},
Number = {5},
Pages = {798-813},
Title = {Parallel stochastic systems biology in the cloud},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2013_ff_bio_cloud_briefings.pdf},
Volume = {15},
Year = {2014},
Bdsk-Url-1 = {http://dx.doi.org/10.1093/bib/bbt040},
Bdsk-Url-2 = {http://calvados.di.unipi.it/storage/paper_files/2013_ff_bio_cloud_briefings.pdf}}

M. Aldinucci, S. Campa, M. Danelutto, P. Kilpatrick, and M. Torquati, “Pool evolution: a domain specific parallel pattern,” in Proc.of the 7th intl. symposium on high-level parallel programming and applications (hlpp), Amsterdam, The Netherlands, 2014.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{2014:ff:pool:hlpp,
Abstract = {We introduce a new parallel pattern derived from a specific application domain and show how it turns out to have application beyond its domain of origin. The pool evolution pattern models the parallel evolution of a population subject to mutations and evolving in such a way that a given fitness function is optimized. The pattern has been demonstrated to be suitable for capturing and modeling the parallel patterns underpinning various evolutionary algorithms, as well as other parallel patterns typical of symbolic computation. In this paper we introduce the pattern, developed in the framework of the ParaPhrase EU-funded FP7 project, we discuss its implementation on modern multi/many core architectures and finally present experimental results obtained with FastFlow and Erlang implementations to assess its feasibility and scalability.},
Address = {Amsterdam, The Netherlands},
Author = {Marco Aldinucci and Sonia Campa and Marco Danelutto and Peter Kilpatrick and Massimo Torquati},
Booktitle = {Proc.of the 7th Intl. Symposium on High-level Parallel Programming and Applications (HLPP)},
Date-Modified = {2015-09-27 12:14:30 +0000},
Keywords = {fastflow, paraphrase, repara},
Month = jul,
Title = {Pool evolution: a domain specific parallel pattern},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2014_hlpp_pool.pdf},
Year = {2014},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2014_hlpp_pool.pdf}}

D. Buono, M. Danelutto, T. De Matteis, G. Mencagli, and M. Torquati, “A lightweight run-time support for fast dense linear algebra on multi-core,” in Proc. of the 12th international conference on parallel and distributed computing and networks (pdcn 2014), 2014.
[BibTeX]

@inproceedings{ff:ffmdf:pdcn:14,
Author = {Buono, Daniele and Danelutto, Marco and De Matteis, Tiziano and Mencagli, Gabriele and Torquati, Massimo},
Booktitle = {Proc. of the 12th International Conference on Parallel and Distributed Computing and Networks (PDCN 2014)},
Date-Modified = {2015-02-01 16:49:46 +0000},
Keywords = {fastflow},
Month = feb,
Publisher = {IASTED, ACTA press},
Title = {A Lightweight Run-Time Support For Fast Dense Linear Algebra on Multi-Core},
Year = {2014}}

D. Buono, T. De Matteis, G. Mencagli, and M. Vanneschi, “Optimizing message-passing on multicore architectures using hardware multi-threading,” in Parallel, distributed and network-based processing (pdp), 2014 22nd euromicro international conference on, Torino, Italy, 2014, pp. 262-270. doi:10.1109/PDP.2014.63
[Abstract] [BibTeX]

@InProceedings{pdp2014,
author = {Buono,Daniele and De Matteis,Tiziano and Mencagli,Gabriele and Vanneschi,Marco},
title = {Optimizing Message-Passing on Multicore Architectures Using Hardware Multi-threading},
booktitle = {Parallel, Distributed and Network-Based Processing (PDP), 2014 22nd Euromicro International Conference on},
year = {2014},
pages = {262-270},
address = {Torino, Italy},
abstract = {Shared-memory and message-passing are two opposite models to develop parallel computations. The shared-
memory model, adopted by existing frameworks such as OpenMP,represents a de-facto standard on multi-/many-core architectures. However, message-passing deserves to be studied for its inherent properties in terms of portability and flexibility as well as for its better ease of debugging. Achieving good performance from the use of messages in shared-memory architectures requires an efficient implementation of the run-time support. This paper investigates the definition of a delegation mechanism on multi-threaded architectures able to: (i) overlap communications with calculation phases; (ii) parallelize distribution and collective operations. Our ideas have been exemplified using two parallel benchmarks on the Intel Phi, showing that in these applications our message-passing support outperforms MPI and reaches similar
performance compared to standard OpenMP implementations.},
doi = {10.1109/PDP.2014.63},
issn = {1066-6192}
}

D. Buono, T. De Matteis, and G. Mencagli, “A high-throughput and low-latency parallelization of window-based stream joins on multicores,” in 12th ieee international symposium on parallel and distributed processing with applications, Milano, Italy, 2014, pp. 117-126. doi:10.1109/ISPA.2014.24
[BibTeX] [URL]

@INPROCEEDINGS{ispa2014,
author = {Buono,Daniele and De Matteis,Tiziano and Mencagli,Gabriele},
booktitle={12th IEEE International Symposium on Parallel and Distributed Processing with Applications},
title={A High-Throughput and Low-Latency Parallelization of Window-based Stream Joins on Multicores},
year = {2014},
isbn = {978-1-4799-4293-0},
pages = {117--126},
numpages = {10},
url = {http://dx.doi.org/10.1109/ISPA.2014.24},
doi = {10.1109/ISPA.2014.24},
acmid = {2681942},
publisher = {IEEE Computer Society},
address={Milano, Italy}
}

M. Danelutto and M. Torquati, “Loop parallelism: a new skeleton perspective on data parallel patterns,” in Proc. of intl. euromicro pdp 2014: parallel distributed and network-based processing, Torino, Italy, 2014. doi:10.1109/PDP.2014.13
[Abstract] [BibTeX] [Download PDF]

@inproceedings{ff:looppar:pdp:14,
Abstract = {Traditionally, skeleton based parallel programming frameworks support data parallelism by providing the pro- grammer with a comprehensive set of data parallel skeletons, based on different variants of map and reduce patterns. On the other side, more conventional parallel programming frameworks provide application programmers with the possibility to introduce parallelism in the execution of loops with a relatively small programming effort. In this work, we discuss a ``ParallelFor'' skeleton provided within the FastFlow framework and aimed at filling the usability and expressivity gap between the classical data parallel skeleton approach and the loop parallelisation facilities offered by frameworks such as OpenMP and Intel TBB. By exploiting the low run-time overhead of the FastFlow parallel skeletons and the new facilities offered by the C++11 standard, our ParallelFor skeleton succeeds to obtain comparable or better performance than both OpenMP and TBB on the Intel Phi many-core and Intel Nehalem multi-core for a set of benchmarks considered, yet requiring a comparable programming effort.},
Address = {Torino, Italy},
Author = {Marco Danelutto and Massimo Torquati},
Booktitle = {Proc. of Intl. Euromicro PDP 2014: Parallel Distributed and network-based Processing},
Date-Added = {2014-02-15 16:53:29 +0000},
Date-Modified = {2015-09-27 12:42:31 +0000},
Doi = {10.1109/PDP.2014.13},
Editor = {Marco Aldinucci and Daniele D'Agostino and Peter Kilpatrick},
Keywords = {fastflow, paraphrase},
Publisher = {IEEE},
Title = {Loop parallelism: a new skeleton perspective on data parallel patterns},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2014_ff_looppar_pdp.pdf},
Year = {2014},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2014_ff_looppar_pdp.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1109/PDP.2014.13}}

T. De Matteis, “Autonomic parallel data stream processing,” in High performance computing simulation (hpcs), 2014 international conference on, Bologna, Italy, 2014, pp. 995-998. doi:10.1109/HPCSim.2014.6903797
[BibTeX]

@INPROCEEDINGS{hpcs2014,
author = {De Matteis,Tiziano},
booktitle={High Performance Computing Simulation (HPCS), 2014 International Conference on},
title={Autonomic Parallel Data Stream Processing},
title={Autonomic parallel Data Stream Processing},
year={2014},
month={July},
pages={995-998},
address={Bologna, Italy},
doi={10.1109/HPCSim.2014.6903797}
}

M. Drocco, M. Aldinucci, and M. Torquati, “A dynamic memory allocator for heterogeneous platforms,” in Advanced computer architecture and compilation for high-performance and embedded systems (acaces) — poster abstracts, Fiuggi, Italy, 2014.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{ff:acaces:14,
Abstract = {Modern computers are built upon heterogeneous multi-core/many cores architectures (e.g. GPGPU connected to multi-core CPU). Achieving peak performance on these architectures is hard and may require a substantial programming effort. High-level programming patterns, coupled with efficient low-level runtime supports, have been proposed to relieve the programmer from worrying about low-level details such as synchronisation of racing processes as well as those fine tunings needed to improve the overall performance. Among them are (parallel) dynamic memory allocation and effective exploitation of the memory hierarchy. The memory allocator is often a bottleneck that severely limits program scalability, robustness and portability on parallel systems.
In this work we introduce a novel memory allocator, based on the FastFlow's allocator and the recently proposed CUDA Unified Memory, which aims to efficiently integrate host and device memories into a unique dynamic-allocable memory space, accessible transparently by both host and device code.},
Address = {Fiuggi, Italy},
Author = {Maurizio Drocco and Marco Aldinucci and Massimo Torquati},
Booktitle = {Advanced Computer Architecture and Compilation for High-Performance and Embedded Systems (ACACES) -- Poster Abstracts},
Date-Modified = {2015-03-21 15:14:33 +0000},
Keywords = {fastflow},
Publisher = {HiPEAC},
Title = {A Dynamic Memory Allocator for heterogeneous platforms},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2014_ACACES_ex-abstract.pdf},
Year = {2014},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2013_ACACES_ex-abstract.pdf},
Bdsk-Url-2 = {http://calvados.di.unipi.it/storage/paper_files/2014_ACACES_ex-abstract.pdf}}

C. Misale, G. Ferrero, M. Torquati, and M. Aldinucci, “Sequence alignment tools: one parallel pattern to rule them all?,” Biomed research international, 2014. doi:10.1155/2014/539410
[Abstract] [BibTeX] [Download PDF]

@article{bowtie-bwa:ff:multicore:biomed:14,
Abstract = {In this paper we advocate high-level programming methodology for Next Generation Sequencers (NGS) alignment tools for both productivity and absolute performance. We analyse the problem of parallel alignment and review the parallelisation strategies of the most popular alignment tools, which can all be abstracted to a single parallel paradigm. We compare these tools against their porting onto the FastFlow pattern-based programming framework, which provides programmers with high-level parallel patterns. By using a high-level approach, programmers are liberated from all complex aspects of parallel programming, such as synchronisation protocols and task scheduling, gaining more possibility for seamless performance tuning. In this work we show some use case in which, by using a high-level approach for parallelising NGS tools, it is possible to obtain comparable or even better absolute performance for all used datasets.
},
Author = {Claudia Misale and Giulio Ferrero and Massimo Torquati and Marco Aldinucci},
Date-Added = {2013-01-15 15:55:59 +0000},
Date-Modified = {2015-09-27 12:16:28 +0000},
Doi = {10.1155/2014/539410},
Journal = {BioMed Research International},
Keywords = {fastflow,bioinformatics, paraphrase, repara},
Title = {Sequence alignment tools: one parallel pattern to rule them all?},
pdf = {http://downloads.hindawi.com/journals/bmri/2014/539410.pdf},
Year = {2014},
Bdsk-Url-1 = {http://downloads.hindawi.com/journals/bmri/2014/539410.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1155/2014/539410}}

A. Secco, I. Uddin, G. Peretti Pezzi, and M. Torquati, “Message passing on infiniband RDMA for parallel run-time supports,” in Proc. of intl. euromicro pdp 2014: parallel distributed and network-based processing, Torino, Italy, 2014. doi:10.1109/PDP.2014.23
[Abstract] [BibTeX] [Download PDF]

@inproceedings{ff:infiniband:pdp:14,
Abstract = {InfiniBand networks are commonly used in the high performance computing area. They offer RDMA-based opera- tions that help to improve the performance of communication subsystems. In this paper, we propose a minimal message-passing communication layer providing the programmer with a point-to- point communication channel implemented by way of InfiniBand RDMA features. Differently from other libraries exploiting the InfiniBand features, such as the well-known Message Passing Interface (MPI), the proposed library is a communication layer only rather than a programming model, and can be easily used as building block for high-level parallel programming frameworks. Evaluated on micro-benchmarks, the proposed RDMA-based communication channel implementation achieves a comparable performance with highly optimised MPI/InfiniBand implemen- tations. Eventually, the flexibility of the communication layer is evaluated by integrating it within the FastFlow parallel frame- work, currently supporting TCP/IP networks (via the ZeroMQ communication library).},
Address = {Torino, Italy},
Author = {Alessandro Secco and Irfan Uddin and Guilherme {Peretti Pezzi} and Massimo Torquati},
Booktitle = {Proc. of Intl. Euromicro PDP 2014: Parallel Distributed and network-based Processing},
Date-Added = {2013-12-07 18:22:35 +0000},
Date-Modified = {2015-09-27 12:35:04 +0000},
Doi = {10.1109/PDP.2014.23},
Editor = {Marco Aldinucci and Daniele D'Agostino and Peter Kilpatrick},
Keywords = {fastflow, paraphrase, impact},
Publisher = {IEEE},
Title = {Message passing on InfiniBand {RDMA} for parallel run-time supports},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2014_ff_infiniband_pdp.pdf},
Year = {2014},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2014_ff_infiniband_pdp.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1109/PDP.2014.23}}

2013

M. Aldinucci, M. Danelutto, P. Kilpatrick, C. Montangero, and L. Semini, “Managing adaptivity in parallel systems,” in Formal methods for components and objects: intl. symposium, fmco 2011, torino, italy, october 3-5, 2011, revised invited lectures, B. Beckert, F. Damiani, F. S. de Boer, and M. M. Bonsangue, Eds., Springer, 2013, vol. 7542, pp. 199-217. doi:10.1007/978-3-642-35887-6_11
[Abstract] [BibTeX] [Download PDF]

@incollection{adaptivity:fmco:11,
Abstract = {The management of non-functional features (performance, security, power management, etc.) is traditionally a difficult, error prone task for programmers of parallel applications. To take care of these non-functional features, autonomic managers running policies represented as rules using sensors and actuators to monitor and transform a running parallel application may be used. We discuss an approach aimed at providing formal tool support to the integration of independently developed autonomic managers taking care of different non-functional concerns within the same parallel application. Our approach builds on the Behavioural Skeleton experience (autonomic management of non-functional features in structured parallel applications) and on previous results on conflict detection and resolution in rule-based systems.},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick and Carlo Montangero and Laura Semini},
Booktitle = {Formal Methods for Components and Objects: Intl. Symposium, FMCO 2011, Torino, Italy, October 3-5, 2011, Revised Invited Lectures},
Date-Added = {2012-06-04 19:05:16 +0200},
Date-Modified = {2013-05-03 17:55:21 +0000},
Doi = {10.1007/978-3-642-35887-6_11},
Editor = {Bernhard Beckert and Ferruccio Damiani and Frank S. de Boer and Marcello M. Bonsangue},
Isbn = {978-3-642-35886-9},
Keywords = {multicore, distributed, paraphrase},
Pages = {199-217},
Publisher = {Springer},
Series = {LNCS},
Title = {Managing Adaptivity in Parallel Systems},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2013_fmco11_adaptivity.pdf},
Volume = {7542},
Year = {2013},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2013_fmco11_adaptivity.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-642-35887-6_11}}

M. Aldinucci, S. Campa, P. Kilpatrick, and M. Torquati, “Structured data access annotations for massively parallel computations,” in Euro-par 2012 workshops, proc. of the paraphrase workshop on parallel processing, 2013, pp. 381-390. doi:10.1007/978-3-642-36949-0_42
[Abstract] [BibTeX] [Download PDF]

@inproceedings{annotation:para:12,
Abstract = {We describe an approach aimed at addressing the issue of joint exploitation of control (stream) and data parallelism in a skele-ton based parallel programming environment, based on annotations and refactoring. Annotations drive efficient implementation of a parallel com-putation. Refactoring is used to transform the associated skeleton tree into a more efficient, functionally equivalent skeleton tree. In most cases,cost models are used to drive the refactoring process. We show howsample use case applications/kernels may be optimized and discuss pre-liminary experiments with FastFlow assessing the theoretical results.},
Author = {Marco Aldinucci and Sonia Campa and Peter Kilpatrick and Massimo Torquati},
Booktitle = {Euro-Par 2012 Workshops, Proc. of the ParaPhrase Workshop on Parallel Processing},
Date-Added = {2012-07-23 21:22:03 +0000},
Date-Modified = {2015-09-27 12:49:52 +0000},
Doi = {10.1007/978-3-642-36949-0_42},
Keywords = {fastflow, paraphrase},
Pages = {381-390},
Publisher = {Springer},
Series = {LNCS},
Title = {Structured Data Access Annotations for Massively Parallel Computations},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2013_annot_europar_workshops.pdf},
Volume = {7640},
Year = {2013},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2013_annot_europar_workshops.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-642-36949-0_42}}

M. Aldinucci, F. Tordini, M. Drocco, M. Torquati, and M. Coppo, “Parallel stochastic simulators in system biology: the evolution of the species,” in Proc. of intl. euromicro pdp 2013: parallel distributed and network-based processing, Belfast, Nothern Ireland, U.K., 2013. doi:10.1109/PDP.2013.66
[Abstract] [BibTeX] [Download PDF]

@inproceedings{ff_cwc_distr:pdp:13,
Abstract = {The stochastic simulation of biological systems is an increasingly popular technique in Bioinformatics. It is often an enlightening technique, especially for multi-stable systems which dynamics can be hardly captured with ordinary differential equations. To be effective, stochastic simulations should be supported by powerful statistical analysis tools. The simulation-analysis workflow may however result in being computationally expensive, thus compromising the interactivity required in model tuning. In this work we advocate the high-level design of simulators for stochastic systems as a vehicle for building efficient and portable parallel simulators. In particular, the Calculus of Wrapped Components (CWC) simulator, which is designed according to the FastFlow's pattern-based approach, is presented and discussed in this work. FastFlow has been extended to support also clusters of multi-cores with minimal coding effort, assessing the portability of the approach.},
Address = {Belfast, Nothern Ireland, U.K.},
Author = {Marco Aldinucci and Fabio Tordini and Maurizio Drocco and Massimo Torquati and Mario Coppo},
Booktitle = {Proc. of Intl. Euromicro PDP 2013: Parallel Distributed and network-based Processing},
Date-Added = {2012-01-20 19:22:15 +0100},
Date-Modified = {2013-11-24 00:30:43 +0000},
Doi = {10.1109/PDP.2013.66},
Keywords = {fastflow, bioinformatics},
Month = feb,
Publisher = {IEEE},
Title = {Parallel stochastic simulators in system biology: the evolution of the species},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2013_cwc_d_PDP.pdf},
Year = {2013},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2013_cwc_d_PDP.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1109/PDP.2013.66}}

M. Aldinucci, S. Campa, M. Danelutto, P. Kilpatrick, and M. Torquati, “Targeting distributed systems in fastflow,” in Euro-par 2012 workshops, proc. of the coregrid workshop on grids, clouds and p2p computing, 2013, pp. 47-56. doi:10.1007/978-3-642-36949-0_7
[Abstract] [BibTeX] [Download PDF]

@inproceedings{ff:distr:cgs:12,
Abstract = {FastFlow is a structured parallel programming framework targeting shared memory multi-core architectures. In this paper we introduce a FastFlow extension aimed at supporting a network of multi-core workstation as well. The extension supports the execution of FastFlow programs by coordinating -- in a structured way -- the fine grain parallel activities running on a single workstation. We discuss the design and the implementation of this extension presenting preliminary experimental results validating it on state-of-the-art networked multi-core nodes.},
Author = {Marco Aldinucci and Sonia Campa and Marco Danelutto and Peter Kilpatrick and Massimo Torquati},
Booktitle = {Euro-Par 2012 Workshops, Proc. of the CoreGrid Workshop on Grids, Clouds and P2P Computing},
Date-Added = {2012-07-23 21:22:03 +0000},
Date-Modified = {2015-09-27 12:47:54 +0000},
Doi = {10.1007/978-3-642-36949-0_7},
Keywords = {fastflow, paraphrase},
Pages = {47-56},
Publisher = {Springer},
Series = {LNCS},
Title = {Targeting Distributed Systems in FastFlow},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2012_distr_ff_cgsymph.pdf},
Volume = {7640},
Year = {2013},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2012_distr_ff_cgsymph.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-642-36949-0_7}}

M. Aldinucci, S. Campa, F. Tordini, M. Torquati, and P. Kilpatrick, “An abstract annotation model for skeletons,” in Formal methods for components and objects: intl. symposium, fmco 2011, torino, italy, october 3-5, 2011, revised invited lectures, B. Beckert, F. Damiani, F. S. de Boer, and M. M. Bonsangue, Eds., Springer, 2013, vol. 7542, pp. 257-276. doi:10.1007/978-3-642-35887-6_14
[Abstract] [BibTeX] [Download PDF]

@incollection{toolchain:fmco:11,
Abstract = {Multi-core and many-core platforms are becoming increasingly heterogeneous and asymmetric. This significantly increases the porting and tuning effort required for parallel codes, which in turn often leads to a growing gap between peak machine power and actual application performance. In this work a first step toward the automated optimization of high level skeleton-based parallel code is discussed. The paper presents an abstract annotation model for skeleton programs aimed at formally describing suitable mapping of parallel activities on a high-level platform representation. The derived mapping and scheduling strategies are used to generate optimized run-time code.},
Author = {Marco Aldinucci and Sonia Campa and Fabio Tordini and Massimo Torquati and Peter Kilpatrick},
Booktitle = {Formal Methods for Components and Objects: Intl. Symposium, FMCO 2011, Torino, Italy, October 3-5, 2011, Revised Invited Lectures},
Date-Added = {2012-06-04 19:23:25 +0200},
Date-Modified = {2013-11-24 00:33:41 +0000},
Doi = {10.1007/978-3-642-35887-6_14},
Editor = {Bernhard Beckert and Ferruccio Damiani and Frank S. de Boer and Marcello M. Bonsangue},
Isbn = {978-3-642-35886-9},
Keywords = {fastflow, paraphrase},
Pages = {257-276},
Publisher = {Springer},
Series = {LNCS},
Title = {An abstract annotation model for skeletons},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2013_fmco11_annotation.pdf},
Volume = {7542},
Year = {2013},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2013_fmco11_annotation.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-642-35887-6_14}}

D. Buono, M. Danelutto, S. Lametti, and M. Torquati, “Parallel patterns for general purpose many-core,” in Proc. of intl. euromicro pdp 2013: parallel distributed and network-based processing, Belfast, Nothern Ireland, U.K., 2013. doi:10.1109/PDP.2013.27
[Abstract] [BibTeX]

@inproceedings{ff_tilera:pdp:13,
Abstract = {Efficient programming of general purpose many-core accelerators poses several challenging problems. The high number of cores available, the peculiarity of the interconnection network, and the complex memory hierarchy organization, all contribute to make efficient programming of such devices difficult. We propose to use parallel design patterns, implemented using algorithmic skeletons, to abstract and hide most of the difficulties related to the efficient programming of many-core accelerators. In particular, we discuss the porting of the FastFlow framework on the Tilera TilePro64 architecture and the results obtained running synthetic benchmarks as well as true application kernels. These results demonstrate the efficiency achieved while using patterns on the TilePro64 both to program stand-alone skeleton-based parallel applications and to accelerate existing sequential code.},
Address = {Belfast, Nothern Ireland, U.K.},
Author = {Daniele Buono and Marco Danelutto and Silvia Lametti and Massimo Torquati},
Booktitle = {Proc. of Intl. Euromicro PDP 2013: Parallel Distributed and network-based Processing},
Date-Modified = {2013-11-24 00:31:22 +0000},
Doi = {10.1109/PDP.2013.27},
Keywords = {fastflow},
Month = feb,
Publisher = {IEEE},
Title = {Parallel Patterns for General Purpose Many-Core},
Year = {2013},
Bdsk-Url-1 = {http://dx.doi.org/10.1109/PDP.2013.27}}

M. Danelutto, L. Deri, D. De Sensi, and M. Torquati, “Deep packet inspection on commodity hardware using fastflow,” in Proceedings of 15th international parallel computing conference (ParCo), Munich, Germany, 2013, pp. 92-99. doi:10.3233/978-1-61499-381-0-92
[Abstract] [BibTeX] [URL] [Download PDF]

@InProceedings{ff:DPI:14,
author = {Danelutto, Marco and Deri, Luca and De Sensi, Daniele and Torquati, Massimo},
title = {Deep Packet Inspection on Commodity Hardware using FastFlow},
booktitle = {Proceedings of 15th International Parallel Computing Conference ({ParCo})},
year = {2013},
editor = {Michael Bader and Arndt Bode and Hans-Joachim Bungartz and Michael Gerndt and Gerhard R. Joubert and Frans Peters},
volume = {25},
series = {Advances in Parallel Computing},
pages = {92 -- 99},
address = {Munich, Germany},
publisher = {IOS Press},
doi = {10.3233/978-1-61499-381-0-92},
keywords = {fastflow, dpi, network monitoring},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2013_ParCo.pdf},
url = {http://ebooks.iospress.nl/publication/35869},
abstract = {The analysis of packet payload is mandatory for network security and traffic monitoring applications. The computational cost of this activity pushed the industry towards hardware-assisted deep packet inspection (DPI) that have the disadvantage of being more expensive and less flexible. This paper covers the design and implementation of a new DPI framework using FastFlow, a skeleton-based parallel programming library targeting efficient streaming on multi-core architectures. The experimental results demonstrate the efficiency of the DPI framework proposed, proving the feasibility to perform 10Gbit DPI analysis using modern commodity hardware.},
}

M. Danelutto and M. Torquati, “A risc building block set for structured parallel programming,” in Proc. of intl. euromicro pdp 2013: parallel distributed and network-based processing, Belfast, Nothern Ireland, U.K., 2013. doi:10.1109/PDP.2013.17
[Abstract] [BibTeX]

@inproceedings{RISCbb:pdp:13,
Abstract = {We propose a set of building blocks (RISC-pb2l) suitable to build high-level structured parallel programming frameworks. The set is designed following a RISC approach. RISC-pb2l is architecture independent but the implementation of the different blocks may be specialized to make the best usage of the target architecture peculiarities. A number of optimizations may be designed transforming basic building blocks compositions into more efficient compositions, such that parallel application efficiency may be derived by construction rather than by debugging.},
Address = {Belfast, Nothern Ireland, U.K.},
Author = {Marco Danelutto and Massimo Torquati},
Booktitle = {Proc. of Intl. Euromicro PDP 2013: Parallel Distributed and network-based Processing},
Date-Modified = {2015-09-27 12:46:20 +0000},
Doi = {10.1109/PDP.2013.17},
Keywords = {fastflow},
Month = feb,
Publisher = {IEEE},
Title = {A RISC building block set for structured parallel programming},
Year = {2013},
Bdsk-Url-1 = {http://dx.doi.org/10.1109/PDP.2013.17}}

T. De Matteis, F. Luporini, G. Mencagli, and M. Vanneschi, “Evaluation of architectural supports for fine-grained synchronization mechanisms,” in Proceedings of the 11th iasted international conference on parallel and distributed computing and networks, Innsbruck, Austria, 2013.
[Abstract] [BibTeX]

@InProceedings{pdcn2013,
author = {De Matteis, Tiziano and Luporini, Fabio and Mencagli, Gabriele and Vanneschi, Marco},
title = {Evaluation of Architectural Supports for Fine-Grained Synchronization Mechanisms},
booktitle = {Proceedings of the 11th IASTED International Conference on Parallel and Distributed Computing and Networks},
year = {2013},
address = {Innsbruck, Austria},
publisher = {Iasted},
abstract = {The advent of multi-/many-core architectures demands efficient run-time supports to sustain parallel applications scalability. Synchronization mechanisms should be optimized in order to account for different scenarios, such as the interaction between threads executed on different cores as well as intra-core synchronization, i.e. involving threads executed on hardware contexts of the same core. In this perspective, we describe the design issues of two notable mechanisms for shared-memory parallel computations. We point out how specific architectural supports, like hardware cache coherence and core-to-core interconnection networks, make it possible to design optimized implementations of such mechanisms. In this paper we discuss experimental results on three representative architectures: a flagship Intel multi-core and two interesting network processors. The final result helps to untangle the complex implementation space of synchronization mechanisms.},
isbn = {978-088986943-1}
}

K. Hammond, M. Aldinucci, C. Brown, F. Cesarini, M. Danelutto, H. González-Vélez, P. Kilpatrick, R. Keller, M. Rossbory, and G. Shainer, “The paraphrase project: parallel patterns for adaptive heterogeneous multicore systems,” in Formal methods for components and objects: intl. symposium, fmco 2011, torino, italy, october 3-5, 2011, revised invited lectures, B. Beckert, F. Damiani, F. S. de Boer, and M. M. Bonsangue, Eds., Springer, 2013, vol. 7542, pp. 218-236. doi:10.1007/978-3-642-35887-6_12
[Abstract] [BibTeX] [Download PDF]

@incollection{paraphrase:fmco:11,
Abstract = {This paper describes the ParaPhrase project, a new 3-year targeted research project funded under EU Framework 7 Objective 3.4 (Computer Systems), starting in October 2011. ParaPhrase aims to follow a new approach to introducing parallelism using advanced refactoring techniques coupled with high-level parallel design patterns. The refactoring approach will use these design patterns to restructure programs defined as networks of software components into other forms that are more suited to parallel execution. The programmer will be aided by high-level cost information that will be integrated into the refactoring tools. The implementation of these patterns will then use a well-understood algorithmic skeleton approach to achieve good parallelism. A key ParaPhrase design goal is that parallel components are intended to match heterogeneous architectures, defined in terms of CPU/GPU combinations, for example. In order to achieve this, the ParaPhrase approach will map components at link time to the
available hardware, and will then re-map them during program execution, taking account of multiple applications, changes in hardware resource availability, the desire to reduce communication costs etc. In this way, we aim to develop a new approach to programming that will be able to produce software that can adapt to dynamic changes in the system environment. Moreover, by using a strong component basis for parallelism, we can achieve potentially significant gains in terms of reducing sharing at a high level of abstraction, and so in reducing or even eliminating the costs that are usually associated with cache management, locking, and synchronisation.},
Author = {Kevin Hammond and Marco Aldinucci and Chris Brown and Francesco Cesarini and Marco Danelutto and Horacio Gonz\'alez-V\'elez and Peter Kilpatrick and Rainer Keller and Michael Rossbory and Gilad Shainer},
Booktitle = {Formal Methods for Components and Objects: Intl. Symposium, FMCO 2011, Torino, Italy, October 3-5, 2011, Revised Invited Lectures},
Date-Added = {2012-06-04 19:21:18 +0200},
Date-Modified = {2013-11-24 00:33:27 +0000},
Doi = {10.1007/978-3-642-35887-6_12},
Editor = {Bernhard Beckert and Ferruccio Damiani and Frank S. de Boer and Marcello M. Bonsangue},
Isbn = {978-3-642-35886-9},
Keywords = {paraphrase},
Pages = {218-236},
Publisher = {Springer},
Series = {LNCS},
Title = {The ParaPhrase Project: Parallel Patterns for Adaptive Heterogeneous Multicore Systems},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2013_fmco11_paraphrase.pdf},
Volume = {7542},
Year = {2013},
Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-642-35887-6_12},
Bdsk-Url-2 = {http://calvados.di.unipi.it/storage/paper_files/2013_fmco11_paraphrase.pdf}}

C. Misale, M. Aldinucci, and M. Torquati, “Memory affinity in multi-threading: the bowtie2 case study,” in Advanced computer architecture and compilation for high-performance and embedded systems (acaces) — poster abstracts, Fiuggi, Italy, 2013.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{ff:acaces:13,
Abstract = {The diffusion of the Next Generation Sequencing (NGS) has increased
the amount of data obtainable by genomic experiments. From a DNA sample a NGS run is able to produce millions of short sequences (called reads), which should be mapped into a reference genome. In this paper, we analyse the performance of Bowtie2, a fast and popular DNA mapping tool. Bowtie2 exhibits a multithreading implementation on top of pthreads, spin-locks and SSE2 SIMD extension.
From parallel computing viewpoint, is a paradigmatic example of a software requiring to address three
fundamental problems in shared-memory programming for cache-coherent multi-core platforms: synchronisation efficiency at very fine grain (due to short reads), load-balancing (due to long reads), and efficient usage of memory subsystem (due to SSE2 memory pressure).
We compare the original implementation against an alternative implementation on top of the
FastFlow pattern-based programming framework. The proposed design exploits the high-level farm pattern of FastFlow, which is implemented top of nonblocking multi-threading and lock-less (CAS-free) queues, and provides the programmer with high-level mechanism to tune task scheduling to achieve both load-balancing and memory affinity. The proposed design, despite the high-level design, is always faster and more scalable with respect to the original one.
The design of both original and alternative version will be presented along with their experimental evaluation on real-world data sets.},
Address = {Fiuggi, Italy},
Author = {Claudia Misale and Marco Aldinucci and Massimo Torquati},
Booktitle = {Advanced Computer Architecture and Compilation for High-Performance and Embedded Systems (ACACES) -- Poster Abstracts},
Date-Added = {2015-03-21 15:12:59 +0000},
Date-Modified = {2015-03-21 15:12:59 +0000},
Isbn = {9789038221908},
Keywords = {fastflow},
Publisher = {HiPEAC},
Title = {Memory affinity in multi-threading: the Bowtie2 case study},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2013_ACACES_ex-abstract.pdf},
Year = {2013},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2013_ACACES_ex-abstract.pdf}}

2012

M. Aldinucci, M. Coppo, F. Damiani, M. Drocco, E. Sciacca, S. Spinella, M. Torquati, and A. Troina, “On parallelizing on-line statistics for stochastic biological simulations,” in Euro-par 2011 workshops, proc. of the 2st workshop on high performance bioinformatics and biomedicine (hibb), Bordeaux, France, 2012, pp. 3-12. doi:10.1007/978-3-642-29740-3_2
[Abstract] [BibTeX] [Download PDF]

@inproceedings{cwcsim:onlinestats:ff:hibb:11,
Abstract = {This work concerns a general technique to enrich parallel version of stochastic simulators for biological systems with tools for on-line statistical analysis of the results. In particular, within the FastFlow parallel programming framework, we describe the methodology and the implementation of a parallel Monte Carlo simulation infrastructure extended with user-defined on-line data filtering and mining functions. The simulator and the on-line analysis were validated on large multi-core platforms and representative proof-of-concept biological systems.},
Address = {Bordeaux, France},
Author = {Marco Aldinucci and Mario Coppo and Ferruccio Damiani and Maurizio Drocco and Eva Sciacca and Salvatore Spinella and Massimo Torquati and Angelo Troina},
Booktitle = {Euro-Par 2011 Workshops, Proc. of the 2st Workshop on High Performance Bioinformatics and Biomedicine (HiBB)},
Date-Added = {2010-08-15 00:50:09 +0200},
Date-Modified = {2013-11-24 00:35:51 +0000},
Doi = {10.1007/978-3-642-29740-3_2},
Editor = {Michael Alexander and Pasqua D'Ambra and Adam Belloum and George Bosilca and Mario Cannataro and Marco Danelutto and Beniamino Di Martino and Michael Gerndt and Emmanuel Jeannot and Raymond Namyst and Jean Roman and Stephen L. Scott and Jesper Larsson Tr{\"a}ff and Geoffroy Vall{\'e}e and Josef Weidendorfer},
Keywords = {bioinformatics, fastflow},
Pages = {3-12},
Publisher = {Springer},
Series = {LNCS},
Title = {On Parallelizing On-Line Statistics for Stochastic Biological Simulations},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2012_onlinestat_HiBB2011.pdf},
Volume = {7156},
Year = {2012},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2012_onlinestat_HiBB2011.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-642-29740-3_2}}

M. Aldinucci, M. Danelutto, L. Anardu, M. Torquati, and P. Kilpatrick, “Parallel patterns + macro data flow for multi-core programming,” in Proc. of intl. euromicro pdp 2012: parallel distributed and network-based processing, Garching, Germany, 2012, pp. 27-36. doi:10.1109/PDP.2012.44
[Abstract] [BibTeX] [Download PDF]

@inproceedings{dataflow:pdp:12,
Abstract = {Data flow techniques have been around since the early '70s when they were used in compilers for sequential languages. Shortly after their introduction they were also considered as a possible model for parallel computing, although the impact here was limited. Recently, however, data flow has been identified as a candidate for efficient implementation of various programming models on multi-core architectures. In most cases, however, the burden of determining data flow ``macro'' instructions is left to the programmer, while the compiler/run time system manages only the efficient scheduling of these instructions. We discuss a structured parallel programming approach supporting automatic compilation of programs to macro data flow and we show experimental results demonstrating the feasibility of the approach and the efficiency of the resulting ``object'' code on different classes of state-of-the-art multi-core architectures. The experimental results use different base mechanisms to implement the
macro data flow run time support, from plain pthreads with condition variables to more modern and effective lock- and fence-free parallel frameworks. Experimental results comparing efficiency of the proposed approach with those achieved using other, more classical, parallel frameworks are also presented.},
Address = {Garching, Germany},
Author = {Marco Aldinucci and Marco Danelutto and Lorenzo Anardu and Massimo Torquati and Peter Kilpatrick},
Booktitle = {Proc. of Intl. Euromicro PDP 2012: Parallel Distributed and network-based Processing},
Date-Added = {2012-10-24 17:29:14 +0000},
Date-Modified = {2013-11-24 00:35:34 +0000},
Doi = {10.1109/PDP.2012.44},
Keywords = {fastflow},
Month = feb,
Pages = {27-36},
Publisher = {IEEE},
Title = {Parallel patterns + Macro Data Flow for multi-core programming},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2012_mdf_PDP.pdf},
Year = {2012},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2012_mdf_PDP.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1109/PDP.2012.44}}

M. Aldinucci, C. Spampinato, M. Drocco, M. Torquati, and S. Palazzo, “A parallel edge preserving algorithm for salt and pepper image denoising,” in Proc. of 2nd intl. conference on image processing theory tools and applications (ipta), Istambul, Turkey, 2012, pp. 97-102. doi:10.1109/IPTA.2012.6469567
[Abstract] [BibTeX] [Download PDF]

@inproceedings{denoiser:ff:ipta:12,
Abstract = {In this paper a two-phase filter for removing ``salt and pepper'' noise is proposed. In the first phase, an adaptive median filter is used to identify the set of the noisy pixels; in the second phase, these pixels are restored according to a regularization method, which contains a data-fidelity term reflecting the impulse noise characteristics. The algorithm, which exhibits good performance both in denoising and in restoration, can be easily and effectively parallelized to exploit the full power of multi-core CPUs and GPGPUs; the proposed implementation based on the FastFlow library achieves both close-to-ideal speedup and very good wall-clock execution figures.},
Address = {Istambul, Turkey},
Author = {Marco Aldinucci and Concetto Spampinato and Maurizio Drocco and Massimo Torquati and Simone Palazzo},
Booktitle = {Proc. of 2nd Intl. Conference on Image Processing Theory Tools and Applications (IPTA)},
Date-Added = {2012-06-04 18:38:01 +0200},
Date-Modified = {2015-09-27 12:53:53 +0000},
Doi = {10.1109/IPTA.2012.6469567},
Editor = {K. Djemal and M. Deriche and W. Puech and Osman N. Ucan},
Isbn = {978-1-4673-2582-0},
Keywords = {fastflow, impact},
Month = oct,
Pages = {97-102},
Publisher = {IEEE},
Title = {A Parallel Edge Preserving Algorithm for Salt and Pepper Image Denoising},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2012_2phasedenoiser_ff_ipta.pdf},
Year = {2012},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2012_2phasedenoiser_ff_ipta.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1109/IPTA.2012.6469567}}

M. Aldinucci, M. Danelutto, and M. Torquati, “Fastflow tutorial,” Università di Pisa, Dipartimento di Informatica, Italy, TR-12-04, 2012.
[BibTeX] [Download PDF]

@techreport{fastflow_tutorial:TR-12-04:12,
Author = {Marco Aldinucci and Marco Danelutto and Massimo Torquati},
Date-Added = {2011-03-17 23:19:05 +0100},
Date-Modified = {2013-11-24 00:34:55 +0000},
Institution = {Universit\`a di Pisa, Dipartimento di Informatica, Italy},
Keywords = {fastflow},
Month = mar,
Number = {TR-12-04},
Title = {FastFlow tutorial},
pdf = {http://compass2.di.unipi.it/TR/Files/TR-12-04.pdf.gz},
Year = {2012},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/Files/TR-12-04.pdf.gz}}

M. Aldinucci, M. Danelutto, P. Kilpatrick, M. Meneghin, and M. Torquati, “An efficient unbounded lock-free queue for multi-core systems,” in Proc. of 18th intl. euro-par 2012 parallel processing, Rhodes Island, Greece, 2012, pp. 662-673. doi:10.1007/978-3-642-32820-6_65
[Abstract] [BibTeX] [Download PDF]

@inproceedings{ff:spsc:europar:12,
Abstract = {The use of efficient synchronization mechanisms is crucial for implementing fine grained parallel programs on modern shared cache multi-core architectures. In this paper we study this problem by considering Single-Producer/Single-Consumer (SPSC) coordination using unbounded queues. A novel unbounded SPSC algorithm capable of reducing the row synchronization latency and speeding up Producer-Consumer coordination is presented. The algorithm has been extensively tested on a shared-cache multi-core platform and a sketch proof of correctness is presented. The queues proposed have been used as basic building blocks to implement the FastFlow parallel framework, which has been demonstrated to offer very good performance for fine-grain parallel applications.},
Address = {Rhodes Island, Greece},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick and Massimiliano Meneghin and Massimo Torquati},
Booktitle = {Proc. of 18th Intl. Euro-Par 2012 Parallel Processing},
Date-Added = {2011-04-19 10:22:00 +0200},
Date-Modified = {2015-09-27 12:55:20 +0000},
Doi = {10.1007/978-3-642-32820-6_65},
Keywords = {fastflow, paraphrase},
Month = aug,
Pages = {662-673},
Publisher = {Springer},
Series = {LNCS},
Title = {An Efficient Unbounded Lock-Free Queue for Multi-core Systems},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2012_spsc_europar.pdf},
Volume = {7484},
Year = {2012},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2012_spsc_europar.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-642-32820-6_65}}

M. Aldinucci, M. Danelutto, P. Kilpatrick, and M. Torquati, “Targeting heterogeneous architectures via macro data flow,” Parallel processing letters, vol. 22, iss. 2, 2012. doi:10.1142/S0129626412400063
[Abstract] [BibTeX] [Download PDF]

@article{mdf:hplgpu:ppl:12,
Abstract = {We propose a data flow based run time system as an efficient tool for supporting execution of parallel code on heterogeneous architectures hosting both multicore CPUs and GPUs. We discuss how the proposed run time system may be the target of both structured parallel applications developed using algorithmic skeletons/parallel design patterns and also more ``domain specific'' programming models. Experimental results demonstrating the feasibility of the approach are presented.},
Annote = {Extended version of Intl. Workshop on High-level Programming for Heterogeneous and Hierarchical Parallel Systems (HLPGPU)},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick and Massimo Torquati},
Date-Added = {2012-04-25 13:20:40 +0000},
Date-Modified = {2015-09-27 12:55:11 +0000},
Doi = {10.1142/S0129626412400063},
Issn = {0129-6264},
Journal = {Parallel Processing Letters},
Keywords = {fastflow, paraphrase},
Month = jun,
Number = {2},
Title = {Targeting heterogeneous architectures via macro data flow},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2012_mdf_PPL-hplgpu.pdf},
Volume = {22},
Year = {2012},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2012_mdf_PPL-hplgpu.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1142/S0129626412400063}}

C. Brown, K. Hammond, M. Danelutto, and P. Kilpatrick, “A language-independent parallel refactoring framework,” in Proceedings of the fifth workshop on refactoring tools, 2012, pp. 54-58. doi:10.1145/2328876.2328884
[BibTeX] [URL]

@inproceedings{paraphrase-wrt-2012,
author = {Brown, Christopher and Hammond, Kevin and Danelutto, Marco and Kilpatrick, Peter},
title = {A Language-independent Parallel Refactoring Framework},
booktitle = {Proceedings of the Fifth Workshop on Refactoring Tools},
series = {WRT '12},
year = {2012},
isbn = {978-1-4503-1500-5},
note = {Rapperswil, Switzerland},
pages = {54--58},
url = {http://doi.acm.org/10.1145/2328876.2328884},
doi = {10.1145/2328876.2328884},
publisher = {ACM},
keywords = {C/C++, Erlang, ParaPhrase, concurrency, parallelism, patterns, refactoring, skeletons},
}

F. Tordini, M. Aldinucci, and M. Torquati, “High-level lock-less programming for multicore,” in Advanced computer architecture and compilation for high-performance and embedded systems (acaces) — poster abstracts, Fiuggi, Italy, 2012.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{ff:acaces:12,
Abstract = {Modern computers are built upon multi-core architectures. Achieving peak performance on these architectures is hard and may require a substantial programming effort. The synchronisation of many processes racing to access a common resource (the shared memory) has been a fundamental problem on parallel computing for years, and many solutions have been proposed to address this issue. Non-blocking synchronisation and transactional primitives have been envisioned as a way to reduce memory wall problem. Despite sometimes effective (and exhibiting a great momentum in the research community), they are only one facet of the problem, as their exploitation still requires non-trivial programming skills.
With non-blocking philosophy in mind, we propose high-level programming patterns that will relieve the programmer from worrying about low-level details such as synchronisation of racing processes as well as those fine tunings needed to improve the overall performance, like proper (distributed) dynamic memory allocation and effective exploitation of the memory hierarchy.},
Address = {Fiuggi, Italy},
Author = {Fabio Tordini and Marco Aldinucci and Massimo Torquati},
Booktitle = {Advanced Computer Architecture and Compilation for High-Performance and Embedded Systems (ACACES) -- Poster Abstracts},
Date-Added = {2012-07-17 17:58:06 +0200},
Date-Modified = {2013-11-24 00:36:10 +0000},
Isbn = {9789038219875},
Keywords = {fastflow},
Publisher = {HiPEAC},
Title = {High-level lock-less programming for multicore},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2012_ACACES_ex-abstract.pdf},
Year = {2012},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2012_ACACES_ex-abstract.pdf}}

M. Torquati, M. Vanneschi, M. Amini, S. Guelton, R. Keryell, V. Lanore, F. -X. Pasquier, M. Barreteau, R. Barrere, T. Petrisor, E. Lenormand, C. Cantini, and D. F. Stefani, “An innovative compilation tool-chain for embedded multi-core architectures,” in Embedded world conference, Nuremberg, Germany, 2012.
[BibTeX]

@inproceedings{artemis:toolchain:12,
Address = {Nuremberg, Germany},
Author = {Massimo Torquati and Marco Vanneschi and M. Amini and S. Guelton and R. Keryell and V. Lanore and F.-X. Pasquier and M. Barreteau and R. Barrere and T. Petrisor and E. Lenormand and C. Cantini and F. De Stefani},
Booktitle = {Embedded World Conference},
Date-Added = {2012-04-04 12:18:38 +0000},
Date-Modified = {2012-04-04 12:21:40 +0000},
Month = feb,
Title = {An innovative compilation tool-chain for embedded multi-core architectures},
Year = {2012}}

T. Weigold, M. Aldinucci, M. Danelutto, and V. Getov, “Process-driven biometric identification by means of autonomic grid components,” Int. j. of autonomous and adaptive communications systems, vol. 5, iss. 3, pp. 274-291, 2012. doi:10.1504/IJAACS.2012.047659
[Abstract] [BibTeX] [Download PDF]

@article{ibm:ijaacs:12,
Abstract = {Today's business applications are increasingly process driven, meaning that the main application logic is executed by a dedicate process engine. In addition, component-oriented software development has been attracting attention for building complex distributed applications. In this paper we present the experiences gained from building a process-driven biometric identification application that makes use of Grid infrastructures via the Grid Component Model (GCM). GCM, besides guaranteeing access to Grid resources, supports autonomic management of notable parallel composite components. This feature is exploited within our biometric identification application to ensure real time identification of fingerprints. Therefore, we briefly introduce the GCM framework and the process engine used, and we describe the implementation of the application by means of autonomic GCM components. Finally, we summarize the results, experiences, and lessons learned focusing on the integration of autonomic GCM components
and the process-driven approach.},
Author = {Thomas Weigold and Marco Aldinucci and Marco Danelutto and Vladimir Getov},
Date-Added = {2009-08-01 21:01:36 +0200},
Date-Modified = {2013-06-17 14:14:36 +0000},
Doi = {10.1504/IJAACS.2012.047659},
Issn = {1754-8632},
Journal = {Int. J. of Autonomous and Adaptive Communications Systems},
Number = {3},
Pages = {274-291},
Publisher = {Inderscience Enterprises Ltd.},
Title = {Process-Driven Biometric Identification by means of Autonomic Grid Components},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2012_JAACS_Weigold.pdf},
Volume = {5},
Year = {2012},
Bdsk-Url-1 = {http://www.inderscience.com/info/inarticletoc.php?jcode=ijaacs&year=2012&vol=5&issue=3},
Bdsk-Url-2 = {http://calvados.di.unipi.it/storage/paper_files/2012_JAACS_Weigold.pdf},
Bdsk-Url-3 = {http://dx.doi.org/10.1504/IJAACS.2012.047659}}

2011

M. Aldinucci, M. Danelutto, P. Kilpatrick, M. Meneghin, and M. Torquati, “Accelerating code on multi-cores with fastflow,” in Proc. of 17th intl. euro-par 2011 parallel processing, Bordeaux, France, 2011, pp. 170-181. doi:10.1007/978-3-642-23397-5_17
[Abstract] [BibTeX] [Download PDF]

@inproceedings{ff:acc:europar:11,
Abstract = {FastFlow is a programming framework specifically targeting cache-coherent shared-memory multicores. It is implemented as a stack of C++ template libraries built on top of lock-free (and memory fence free) synchronization mechanisms. Its philosophy is to combine programmability with performance. In this paper a new FastFlow programming methodology aimed at supporting parallelization of existing sequential code via offloading onto a dynamically created software accelerator is presented. The new methodology has been validated using a set of simple micro-benchmarks and some real applications.},
Address = {Bordeaux, France},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick and Massimiliano Meneghin and Massimo Torquati},
Booktitle = {Proc. of 17th Intl. Euro-Par 2011 Parallel Processing},
Date-Added = {2012-06-04 18:35:57 +0200},
Date-Modified = {2013-12-12 00:46:59 +0000},
Doi = {10.1007/978-3-642-23397-5_17},
Editor = {E. Jeannot and R. Namyst and J. Roman},
Keywords = {fastflow},
Month = aug,
Pages = {170-181},
Publisher = {Springer},
Series = {LNCS},
Title = {Accelerating code on multi-cores with FastFlow},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2011_fastflow_acc_europar.pdf},
Volume = {6853},
Year = {2011},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2011_fastflow_acc_europar.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-642-23397-5_17}}

M. Aldinucci, M. Coppo, F. Damiani, M. Drocco, M. Torquati, and A. Troina, “On designing multicore-aware simulators for biological systems,” in Proc. of intl. euromicro pdp 2011: parallel distributed and network-based processing, Ayia Napa, Cyprus, 2011, pp. 318-325. doi:10.1109/PDP.2011.81
[Abstract] [BibTeX] [Download PDF]

@inproceedings{ff:cwc:pdp:11,
Abstract = {The stochastic simulation of biological systems is an increasingly popular technique in bioinformatics. It often is an enlightening technique, which may however result in being computational expensive. We discuss the main opportunities to speed it up on multi-core platforms, which pose new challenges for parallelisation techniques. These opportunities are developed in two general families of solutions involving both the single simulation and a bulk of independent simulations (either replicas of derived from parameter sweep). Proposed solutions are tested on the parallelisation of the CWC simulator (Calculus of Wrapped Compartments) that is carried out according to proposed solutions by way of the FastFlow programming framework making possible fast development and efficient execution on multi-cores.},
Address = {Ayia Napa, Cyprus},
Author = {Marco Aldinucci and Mario Coppo and Ferruccio Damiani and Maurizio Drocco and Massimo Torquati and Angelo Troina},
Booktitle = {Proc. of Intl. Euromicro PDP 2011: Parallel Distributed and network-based Processing},
Date-Added = {2012-02-25 01:21:25 +0000},
Date-Modified = {2013-11-24 00:37:16 +0000},
Doi = {10.1109/PDP.2011.81},
Editor = {Yiannis Cotronis and Marco Danelutto and George Angelos Papadopoulos},
Keywords = {fastflow},
Month = feb,
Pages = {318-325},
Publisher = {IEEE},
Title = {On Designing Multicore-Aware Simulators for Biological Systems},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2011_ff_cwc_sim_PDP.pdf},
Year = {2011},
Bdsk-Url-1 = {http://arxiv.org/pdf/1010.2438v2},
Bdsk-Url-2 = {http://calvados.di.unipi.it/storage/paper_files/2011_ff_cwc_sim_PDP.pdf},
Bdsk-Url-3 = {http://dx.doi.org/10.1109/PDP.2011.81}}

M. Aldinucci, M. Drocco, D. Giordano, C. Spampinato, and M. Torquati, “A parallel edge preserving algorithm for salt and pepper image denoising,” Università degli Studi di Torino, Dip. di Informatica, Italy, 138/2011, 2011.
[BibTeX] [Download PDF]

@techreport{ff:denoiser:tr138-2011,
Author = {Marco Aldinucci and Maurizio Drocco and Daniela Giordano and Concetto Spampinato and Massimo Torquati},
Date-Added = {2010-12-08 19:31:00 +0100},
Date-Modified = {2013-11-24 00:36:56 +0000},
Institution = {Universit\`a degli Studi di Torino, Dip. di Informatica, Italy},
Keywords = {fastflow},
Month = may,
Number = {138/2011},
Title = {A Parallel Edge Preserving Algorithm for Salt and Pepper Image Denoising},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2012_2phasedenoiser_ff_ipta.pdf},
Year = {2011},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2012_2phasedenoiser_ff_ipta.pdf}}

M. Aldinucci, M. Danelutto, P. Kilpatrick, and V. Xhagjika, “Libero: a framework for autonomic management of multiple non-functional concerns,” in Euro-par 2010 workshops, proc. of the coregrid workshop on grids, clouds and p2p computing, Ischia, Italy, 2011, pp. 237-245. doi:10.1007/978-3-642-21878-1_30
[Abstract] [BibTeX] [Download PDF]

@inproceedings{libero:cgsymph:10,
Abstract = {We describe a lightweight prototype framework (LIBERO) designed for experimentation with behavioural skeletons-components implementing a well-known parallelism exploitation pattern and a rule-based autonomic manager taking care of some non-functional feature related to pattern computation. LIBERO supports multiple autonomic managers within the same behavioural skeleton, each taking care of a different non-functional concern. We introduce LIBERO -- built on plain Java and JBoss -- and discuss how multiple managers may be coordinated to achieve a common goal using a two-phase coordination protocol developed in earlier work. We present experimental results that demonstrate how the prototype may be used to investigate autonomic management of multiple, independent concerns.},
Address = {Ischia, Italy},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick and Vamir Xhagjika},
Booktitle = {Euro-Par 2010 Workshops, Proc. of the CoreGrid Workshop on Grids, Clouds and P2P Computing},
Date-Added = {2011-09-12 14:58:27 +0200},
Date-Modified = {2012-12-27 14:26:15 +0000},
Doi = {10.1007/978-3-642-21878-1_30},
Editor = {M. R. Guarracino and F. Vivien and J. L. Tr\"aff and M. Cannataro and M. Danelutto and A. Hast and F. Perla and A. Kn\"upfer and B. Di Martino and M. Alexander},
Month = sep,
Pages = {237-245},
Publisher = {Springer},
Series = {LNCS},
Title = {LIBERO: a framework for autonomic management of multiple non-functional concerns},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2011_libero_coregridworkshop2010.pdf},
Volume = {6586},
Year = {2011},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2011_libero_coregridworkshop2010.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-642-21878-1_30}}

M. Aldinucci, A. Bracciali, P. Liò, A. Sorathiya, and M. Torquati, “StochKit-FF: efficient systems biology on multicore architectures,” in Euro-par 2010 workshops, proc. of the 1st workshop on high performance bioinformatics and biomedicine (hibb), Ischia, Italy, 2011, pp. 167-175. doi:10.1007/978-3-642-21878-1_21
[Abstract] [BibTeX] [Download PDF]

@inproceedings{stochkit-ff:hibb:10,
Abstract = {The stochastic modelling of biological systems is an informative, and in some cases, very adequate technique, which may however result in being more expensive than other modelling approaches, such as differential equations. We present StochKit-FF, a parallel version of StochKit, a reference toolkit for stochastic simulations. StochKit-FF is based on the FastFlow programming toolkit for multicores and exploits the novel concept of selective memory. We experiment StochKit-FF on a model of HIV infection dynamics, with the aim of extracting information from efficiently run experiments, here in terms of average and variance and, on a longer term, of more structured data.},
Address = {Ischia, Italy},
Author = {Marco Aldinucci and Andrea Bracciali and Pietro Li\`o and Anil Sorathiya and Massimo Torquati},
Booktitle = {Euro-Par 2010 Workshops, Proc. of the 1st Workshop on High Performance Bioinformatics and Biomedicine (HiBB)},
Date-Added = {2012-04-12 11:23:46 +0000},
Date-Modified = {2013-11-24 00:36:38 +0000},
Doi = {10.1007/978-3-642-21878-1_21},
Editor = {M. R. Guarracino and F. Vivien and J. L. Tr\"aff and M. Cannataro and M. Danelutto and A. Hast and F. Perla and A. Kn\"upfer and B. Di Martino and M. Alexander},
Keywords = {bioinformatics},
Month = aug,
Pages = {167-175},
Publisher = {Springer},
Series = {{LNCS}},
Title = {{StochKit-FF}: Efficient Systems Biology on Multicore Architectures},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2010_stochkit-ff_hibb.pdf},
Volume = {6586},
Year = {2011},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2010_stochkit-ff_hibb.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-642-21878-1_21}}

M. Aldinucci, S. Ruggieri, and M. Torquati, “Porting decision tree building and pruning algorithms to multicore using fastflow,” Università di Pisa, Dipartimento di Informatica, Italy, TR-11-06, 2011.
[BibTeX] [Download PDF]

@techreport{TR-11-06,
Author = {Marco Aldinucci and Salvatore Ruggieri and Massimo Torquati},
Date-Added = {2012-04-15 18:40:07 +0000},
Date-Modified = {2013-11-24 00:37:04 +0000},
Institution = {Universit\`a di Pisa, Dipartimento di Informatica, Italy},
Keywords = {fastflow},
Month = mar,
Number = {TR-11-06},
Title = {Porting Decision Tree Building and Pruning Algorithms to Multicore using FastFlow},
pdf = {http://compass2.di.unipi.it/TR/Files/TR-11-06.pdf.gz},
Year = {2011},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/Files/TR-11-06.pdf.gz}}

M. Aldinucci, L. Anardu, M. Danelutto, P. Kilpatrick, and M. Torquati, “Targeting multi cores by structured programming and data flow,” Università di Pisa, Dipartimento di Informatica, Italy, TR-11-13, 2011.
[BibTeX] [Download PDF]

@techreport{TR-11-13,
Author = {Marco Aldinucci and Lorenzo Anardu and Marco Danelutto and Peter Kilpatrick and Massimo Torquati},
Date-Added = {2012-06-06 22:55:41 +0000},
Date-Modified = {2012-06-06 22:57:26 +0000},
Institution = {Universit\`a di Pisa, Dipartimento di Informatica, Italy},
Month = sep,
Number = {TR-11-13},
Title = {Targeting multi cores by structured programming and data flow},
pdf = {http://compass2.di.unipi.it/TR/Files/TR-11-13.pdf.gz},
Year = {2011},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/Files/TR-11-06.pdf.gz},
Bdsk-Url-2 = {http://compass2.di.unipi.it/TR/Files/TR-11-13.pdf.gz}}

M. Danelutto, L. Deri, and D. De Sensi, “Network monitoring on multicores with algorithmic skeletons,” in Proceedings of 14th inernational parallel computing conference (ParCo), 2011, pp. 519-526. doi:10.3233/978-1-61499-041-3-519
[Abstract] [BibTeX] [URL] [Download PDF] [Slides]

@inproceedings{DBLP:conf/parco/DaneluttoDS11,
author = {Danelutto, Marco and Deri, Luca and De Sensi, Daniele},
title = {Network Monitoring on Multicores with Algorithmic Skeletons},
booktitle = {Proceedings of 14th Inernational Parallel Computing Conference ({ParCo})},
pages = {519 -- 526},
year = {2011},
crossref = {DBLP:conf/parco/2011},
pdf = {http://pages.di.unipi.it/desensi/assets/pdf/2011_ParCo.pdf},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.231.9143},
doi = {10.3233/978-1-61499-041-3-519},
timestamp = {Tue, 28 Apr 2015 15:53:30 +0200},
abstract = {Monitoring network traffic on 10 Gbit networks requires very efficient tools capable of exploiting modern multicore computing architectures. Specialized network cards can accelerate packet capture and thus reduce the processing overhead, but they can not achieve adequate packet analysis performance. For this reason most monitoring tools cannot cope with high network speeds. We describe the design and implementation of ffProbe, a network traffic monitoring application built on top of FastFlow, combined with several optimized parallel programming patterns. We compare ffProbe with two popular network monitoring probes. The results demonstrate that it can scale significantly better with number of cores and thus may be suitable for monitoring 10 Gbit networks using commodity servers.},
slides = {http://pages.di.unipi.it/desensi/assets/pdf/2011_ParCo_Slides.pdf}
}

M. Danelutto, P. Kilpatrick, C. Montangero, and L. Semini, “Model checking support for conflict resolution in multiple non-functional concern management,” in Euro-par workshops (1), 2011, pp. 128-138.
[BibTeX]

@inproceedings{europar_DaneluttoKMS11,
author = {Marco Danelutto and Peter Kilpatrick and Carlo Montangero and Laura Semini},
title = {Model Checking Support for Conflict Resolution in Multiple Non-functional Concern Management},
booktitle = {Euro-Par Workshops (1)},
year = {2011},
pages = {128-138},
bibsource = {DBLP, http://dblp.uni-trier.de},
crossref = {DBLP:conf/europar/2011w1}
}

2010

M. Aldinucci, M. Danelutto, P. Kilpatrick, M. Meneghin, and M. Torquati, “Accelerating sequential programs using FastFlow and self-offloading,” Università di Pisa, Dipartimento di Informatica, Italy, TR-10-03, 2010.
[Abstract] [BibTeX] [Download PDF]

@techreport{fastflow_acc:tr-10-03,
Abstract = {Shared memory multiprocessors come back to popularity thanks to rapid spreading of commodity multi-core architectures. As ever, shared memory programs are fairly easy to write and quite hard to optimise; providing multi-core programmers with optimising tools and programming frameworks is a nowadays challenge. Few efforts have been done to support effective streaming applications on these architectures. In this paper we introduce FastFlow, a low-level programming framework based on lock-free queues explicitly designed to support high-level languages for streaming applications. We compare FastFlow with state-of-the-art programming frameworks such as Cilk, OpenMP, and Intel TBB. We experimentally demonstrate that FastFlow is always more efficient than all of them in a set of micro-benchmarks and on a real world application; the speedup edge of FastFlow over other solutions might be bold for fine grain tasks, as an example +35% on OpenMP, +226% on Cilk, +96% on TBB for the alignment of protein
P01111 against UniProt DB using Smith-Waterman algorithm.},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick and Massimiliano Meneghin and Massimo Torquati},
Date-Added = {2009-09-08 16:14:34 +0200},
Date-Modified = {2013-11-24 00:39:01 +0000},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica, Italy},
Keywords = {fastflow},
Month = feb,
Number = {TR-10-03},
Title = {Accelerating sequential programs using {FastFlow} and self-offloading},
Year = {2010},
pdf = {http://calvados.di.unipi.it/storage/paper_files/TR-09-12.pdf}}

M. Aldinucci, S. Ruggieri, and M. Torquati, “Porting decision tree algorithms to multicore using FastFlow,” in Proc. of european conference in machine learning and knowledge discovery in databases (ecml pkdd), Barcelona, Spain, 2010, pp. 7-23. doi:10.1007/978-3-642-15880-3_7
[Abstract] [BibTeX] [Download PDF]

@inproceedings{fastflow_c45:emclpkdd,
Abstract = {The whole computer hardware industry embraced multicores. For these machines, the extreme optimisation of sequential algorithms is no longer sufficient to squeeze the real machine power, which can be only exploited via thread-level parallelism. Decision tree algorithms exhibit natural concurrency that makes them suitable to be parallelised. This paper presents an approach for easy-yet-efficient porting of an implementation of the C4.5 algorithm on multicores. The parallel porting requires minimal changes to the original sequential code, and it is able to exploit up to 7X speedup on an Intel dual-quad core machine.},
Address = {Barcelona, Spain},
Author = {Marco Aldinucci and Salvatore Ruggieri and Massimo Torquati},
Booktitle = {Proc. of European Conference in Machine Learning and Knowledge Discovery in Databases (ECML PKDD)},
Date-Added = {2010-06-15 21:03:56 +0200},
Date-Modified = {2013-11-24 00:38:07 +0000},
Doi = {10.1007/978-3-642-15880-3_7},
Editor = {Jos{\'e} L. Balc{\'a}zar and Francesco Bonchi and Aristides Gionis and Mich{\`e}le Sebag},
Keywords = {fastflow},
Month = sep,
Pages = {7-23},
Publisher = {Springer},
Series = {LNCS},
Title = {Porting Decision Tree Algorithms to Multicore using {FastFlow}},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2010_c45FF_ECMLPKDD.pdf},
Volume = {6321},
Year = {2010},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2010_c45FF_ECMLPKDD.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-642-15880-3_7}}

M. Aldinucci, S. Ruggieri, and M. Torquati, “Porting decision tree algorithms to multicore using FastFlow,” Università di Pisa, Dipartimento di Informatica, Italy, TR-10-11, 2010.
[Abstract] [BibTeX] [Download PDF]

@techreport{fastflow_c45:tr-10-11,
Abstract = {The whole computer hardware industry embraced multicores. For these machines, the extreme optimisation of sequential algorithms is no longer sufficient to squeeze the real machine power, which can be only exploited via thread-level parallelism. Decision tree algorithms exhibit natural concurrency that makes them suitable to be parallelised. This paper presents an approach for easy-yet-efficient porting of an implementation of the C4.5 algorithm on multicores. The parallel porting requires minimal changes to the original sequential code, and it is able to exploit up to 7X speedup on an Intel dual-quad core machine.},
Author = {Marco Aldinucci and Salvatore Ruggieri and Massimo Torquati},
Date-Added = {2010-07-11 16:54:09 +0200},
Date-Modified = {2013-11-24 00:38:41 +0000},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica, Italy},
Keywords = {fastflow},
Month = may,
Number = {TR-10-11},
Title = {Porting Decision Tree Algorithms to Multicore using {FastFlow}},
pdf = {http://calvados.di.unipi.it/storage/paper_files/TR-10-11.pdf},
Year = {2010},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/TR-09-12.pdf},
Bdsk-Url-2 = {http://calvados.di.unipi.it/storage/paper_files/TR-10-11.pdf}}

M. Aldinucci, M. Danelutto, M. Meneghin, P. Kilpatrick, and M. Torquati, “Efficient streaming applications on multi-core with FastFlow: the biosequence alignment test-bed,” in Parallel computing: from multicores and gpu’s to petascale (proc. of PARCO 2009, lyon, france), Lyon, France, 2010, pp. 273-280. doi:10.3233/978-1-60750-530-3-273
[Abstract] [BibTeX] [Download PDF]

@inproceedings{fastflow:parco:09,
Abstract = {Shared-memory multi-core architectures are becoming increasingly popular. While their parallelism and peak performance is ever increasing, their efficiency is often disappointing due to memory fence overheads. In this paper we present FastFlow, a programming methodology based on lock-free queues explicitly designed for programming streaming applications on multi-cores. The potential of FastFlow is evaluated on micro-benchmarks and on the Smith-Waterman sequence alignment application, which exhibits a substantial speedup against the state-of-the-art multi-threaded implementation (SWPS3 x86/SSE2).},
Address = {Lyon, France},
Annote = {ISBN: 978-1-60750-529-7},
Author = {Marco Aldinucci and Marco Danelutto and Massimiliano Meneghin and Peter Kilpatrick and Massimo Torquati},
Booktitle = {Parallel Computing: From Multicores and GPU's to Petascale (Proc. of {PARCO 2009}, Lyon, France)},
Date-Added = {2009-06-06 01:38:30 +0200},
Date-Modified = {2013-11-24 00:39:22 +0000},
Doi = {10.3233/978-1-60750-530-3-273},
Editor = {Barbara Chapman and Fr{\'e}d{\'e}ric Desprez and Gerhard R. Joubert and Alain Lichnewsky and Frans Peters and Thierry Priol},
Keywords = {fastflow},
Pages = {273-280},
Publisher = {IOS press},
Series = {Advances in Parallel Computing},
Title = {Efficient streaming applications on multi-core with {FastFlow}: the biosequence alignment test-bed},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2009_fastflow_parco.pdf},
Volume = {19},
Year = {2010},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2009_fastflow_parco.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.3233/978-1-60750-530-3-273}}

M. Aldinucci, M. Meneghin, and M. Torquati, “Efficient Smith-Waterman on multi-core with fastflow,” in Proc. of intl. euromicro pdp 2010: parallel distributed and network-based processing, Pisa, Italy, 2010, pp. 195-199. doi:10.1109/PDP.2010.93
[Abstract] [BibTeX] [Download PDF]

@inproceedings{fastflow:pdp:10,
Abstract = {Shared memory multiprocessors have returned to popularity thanks to rapid spreading of commodity multi-core architectures. However, little attention has been paid to supporting effective streaming applications on these architectures. In this paper we describe FastFlow, a low-level programming framework based on lock-free queues explicitly designed to support high-level languages for streaming applications. We compare FastFlow with state-of-the-art programming frameworks such as Cilk, OpenMP, and Intel TBB. We experimentally demonstrate that FastFlow is always more efficient than them on a given real world application: the speedup of FastFlow over other solutions may be substantial for fine grain tasks, for example +35% over OpenMP, +226% over Cilk, +96% over TBB for the alignment of protein P01111 against UniProt DB using the Smith-Waterman algorithm.},
Address = {Pisa, Italy},
Author = {Marco Aldinucci and Massimiliano Meneghin and Massimo Torquati},
Booktitle = {Proc. of Intl. Euromicro PDP 2010: Parallel Distributed and network-based Processing},
Date-Added = {2007-10-26 01:02:32 +0200},
Date-Modified = {2013-11-24 00:38:51 +0000},
Doi = {10.1109/PDP.2010.93},
Editor = {Marco Danelutto and Tom Gross and Julien Bourgeois},
Keywords = {fastflow},
Month = feb,
Pages = {195-199},
Publisher = {IEEE},
Title = {Efficient {Smith-Waterman} on multi-core with FastFlow},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2010_fastflow_SW_PDP.pdf},
Year = {2010},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2010_fastflow_SW_PDP.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1109/PDP.2010.93}}

M. Aldinucci, M. Coppo, F. Damiani, M. Drocco, M. Torquati, and A. Troina, “On designing multicore-aware simulators for biological systems,” Università degli Studi di Torino, Dipartimento di Informatica, Italy, 131/2010, 2010.
[BibTeX]

@techreport{ff:cwc:pdp:11-tr,
Author = {Marco Aldinucci and Mario Coppo and Ferruccio Damiani and Maurizio Drocco and Massimo Torquati and Angelo Troina},
Date-Added = {2011-05-19 19:07:36 +0200},
Date-Modified = {2013-11-24 00:38:00 +0000},
Institution = {Universit\`a degli Studi di Torino, Dipartimento di Informatica, Italy},
Keywords = {fastflow},
Month = oct,
Number = {131/2010},
Title = {On Designing Multicore-Aware Simulators for Biological Systems},
Year = {2010}}

M. Aldinucci, M. Danelutto, and P. Kilpatrick, “Skeletons for multi/many-core systems,” in Parallel computing: from multicores and gpu’s to petascale (proc. of PARCO 2009, lyon, france), Lyon, France, 2010, pp. 265-272. doi:10.3233/978-1-60750-530-3-265
[Abstract] [BibTeX] [Download PDF]

@inproceedings{multicoreske:parco:09,
Abstract = {We discuss how algorithmic skeletons (and structured parallel programming models in general) can be used to efficiently and seamlessly program multi-core as well as many-core systems. We introduce a new version of the muskel skeleton library that can be used to target multi/many-core systems and we present experimental results that demonstrate the feasibility of the approach. The experimental results presented also give an idea of the computational grains that can be exploited on current, state-of-the-art multi-core systems.},
Address = {Lyon, France},
Annote = {ISBN: 978-1-60750-529-7},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick},
Booktitle = {Parallel Computing: From Multicores and GPU's to Petascale (Proc. of {PARCO 2009}, Lyon, France)},
Date-Added = {2009-06-03 17:56:19 +0200},
Date-Modified = {2012-11-24 09:43:35 +0000},
Doi = {10.3233/978-1-60750-530-3-265},
Editor = {Barbara Chapman and Fr{\'e}d{\'e}ric Desprez and Gerhard R. Joubert and Alain Lichnewsky and Frans Peters and Thierry Priol},
Pages = {265-272},
Publisher = {IOS press},
Series = {Advances in Parallel Computing},
Title = {Skeletons for multi/many-core systems},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2010_muskel_multicore_parco.pdf},
Volume = {19},
Year = {2010},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2010_muskel_multicore_parco.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.3233/978-1-60750-530-3-265}}

M. Aldinucci, M. Danelutto, and P. Kilpatrick, “Autonomic management of multiple non-functional concerns in behavioural skeletons,” in Grids, p2p and services computing, F. Desprez, V. Getov, T. Priol, and R. Yahyapour, Eds., Springer, 2010, pp. 89-103. doi:10.1007/978-1-4419-6794-7_8
[Abstract] [BibTeX] [Download PDF]

@incollection{multiple-nf-concern:cgsymph:09:book,
Abstract = {We introduce and address the problem of concurrent autonomic management of different non-functional concerns in parallel applications build as a hierarchical composition of behavioural skeletons. We first define the problems arising when multiple concerns are dealt with by independent managers, then we propose a methodology supporting coordinated management, and finally we discuss how autonomic management of multiple concerns may be implemented in a typical use case. Being based on the behavioural skeleton concept proposed in the CoreGRID GCM, it is anticipated that the methodology will be readily integrated into the current reference implementation of GCM based on Java ProActive and running on top of major grid middleware systems.},
Annote = {ISBN: 978-1-4419-6793-0(Proc. of the CoreGRID Symposium 2009)},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick},
Booktitle = {Grids, P2P and Services Computing},
Date-Added = {2009-06-30 12:24:06 +0200},
Date-Modified = {2012-02-25 00:39:47 +0000},
Doi = {10.1007/978-1-4419-6794-7_8},
Editor = {Fr\'ed\'eric Desprez and Vladimir Getov and Thierry Priol and Ramin Yahyapour},
Month = aug,
Pages = {89-103},
Publisher = {Springer},
Series = {CoreGRID},
Title = {Autonomic Management of Multiple Non-Functional Concerns in Behavioural Skeletons},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2009_CGSymph_Autonomic_BeSke.pdf},
Year = {2010},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2009_CGSymph_Autonomic_BeSke.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-1-4419-6794-7_8}}

M. Aldinucci, A. Bracciali, P. Liò, A. Sorathiya, and M. Torquati, “StochKit-FF: efficient systems biology on multicore architectures,” Università di Pisa, Dipartimento di Informatica, Italy, TR-10-12, 2010. doi:10.1007/978-3-642-21878-1_21
[Abstract] [BibTeX] [Download PDF]

@techreport{stochkit-ff:tr-10-12,
Abstract = {The stochastic modelling of biological systems is an informative, and in some cases, very adequate technique, which may however result in being more expensive than other modelling approaches, such as differential equations. We present StochKit-FF, a parallel version of StochKit, a reference toolkit for stochastic simulations. StochKit-FF is based on the FastFlow programming toolkit for multicores and exploits the novel concept of selective memory. We experiment StochKit-FF on a model of HIV infection dynamics, with the aim of extracting information from efficiently run experiments, here in terms of average and variance and, on a longer term, of more structured data.},
Author = {Marco Aldinucci and Andrea Bracciali and Pietro Li\`o and Anil Sorathiya and Massimo Torquati},
Date-Added = {2010-06-27 16:39:46 +0200},
Date-Modified = {2013-11-24 00:38:32 +0000},
Doi = {10.1007/978-3-642-21878-1_21},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica, Italy},
Keywords = {fastflow},
Month = jul,
Number = {TR-10-12},
Title = {{StochKit-FF}: Efficient Systems Biology on Multicore Architectures},
pdf = {http://calvados.di.unipi.it/storage/paper_files/TR-10-12.pdf},
Year = {2010},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/TR-10-12.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-642-21878-1_21}}

C. Bertolli, D. Buono, G. Mencagli, M. Torquati, M. Vanneschi, M. Mordacchini, and F. M. Nardini, “Resource discovery support for time-critical adaptive applications,” in Proc. of intl. workshop on emergency management: communication and computing platforms, Caen, France, 2010.
[BibTeX] [Download PDF]

@inproceedings{assistant:iwcmc:10,
Address = {Caen, France},
Author = {Carlo Bertolli and Daniele Buono and Gabriele Mencagli and Massimo Torquati and Marco Vanneschi and Matteo Mordacchini and Franco Maria Nardini},
Booktitle = {Proc. of Intl. Workshop on Emergency Management: Communication and Computing Platforms},
Date-Added = {2010-06-27 20:39:21 +0200},
Date-Modified = {2010-06-27 20:52:28 +0200},
Month = jul,
Publisher = {ACM},
Title = {Resource Discovery Support for Time-Critical Adaptive Applications},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2010_ASSISTANT_IWCMC2010_EMCCP.pdf},
Year = {2010},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2010_ASSISTANT_IWCMC2010_EMCCP.pdf}}

D. Buono, M. Danelutto, and S. Lametti, “Map, reduce and mapreduce, the skeleton way,” Procedia cs, vol. 1, iss. 1, pp. 2095-2103, 2010.
[BibTeX]

@article{DBLP:journals/procedia/BuonoDL10,
Author = {Daniele Buono and Marco Danelutto and Silvia Lametti},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Ee = {http://dx.doi.org/10.1016/j.procs.2010.04.234},
Journal = {Procedia CS},
Number = {1},
Pages = {2095-2103},
Title = {Map, reduce and mapreduce, the skeleton way},
Volume = {1},
Year = {2010}}

M. Torquati, “Single-producer/single-consumer queues on shared cache multi-core systems,” Università di Pisa, Dipartimento di Informatica, Italy, TR-10-20, 2010.
[BibTeX] [Download PDF]

@techreport{ff:ubuffer:pdp:11,
Author = {Massimo Torquati},
Date-Added = {2010-10-25 16:30:17 +0200},
Date-Modified = {2013-11-24 00:37:32 +0000},
Institution = {Universit\`a di Pisa, Dipartimento di Informatica, Italy},
Keywords = {fastflow},
Month = dec,
Number = {TR-10-20},
Title = {Single-Producer/Single-Consumer Queues on Shared Cache Multi-Core Systems},
pdf = {http://compass2.di.unipi.it/TR/Files/TR-10-20.pdf.gz},
Year = {2010},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/Files/TR-10-20.pdf.gz}}

T. Weigold, M. Aldinucci, M. Danelutto, and V. Getov, “Integrating autonomic grid components and process-driven business applications,” in Autonomic computing and communications systems third international icst conference, autonomics 2009, limassol, cyprus, september 9-11, 2009, revised selected papers, Limassol, Cyprus, 2010, pp. 98-113. doi:10.1007/978-3-642-11482-3_7
[Abstract] [BibTeX] [Download PDF]

@inproceedings{ibm:autonomics:09,
Abstract = {Today's business applications are increasingly process driven, meaning that the main application logic is executed by a dedicate process engine. In addition, component-oriented software development has been attracting attention for building complex distributed applications. In this paper we present the experiences gained from building a process-driven biometric identification application which makes use of Grid infrastructures via the Grid Component Model (GCM). GCM, besides guaranteeing access to Grid resources, supports autonomic management of notable parallel composite components. This feature is exploited within our biometric identification application to ensure real time identification of fingerprints. Therefore, we briefly introduce the GCM framework and the process engine used, and we describe the implementation of the application using autonomic GCM components. Finally, we summarize the results, experiences, and lessons learned focusing on the integration of autonomic GCM components and the process-driven approach.},
Address = {Limassol, Cyprus},
Annote = {ISBN: 978-3-642-11481-6},
Author = {Thomas Weigold and Marco Aldinucci and Marco Danelutto and Vladimir Getov},
Booktitle = {Autonomic Computing and Communications Systems Third International ICST Conference, Autonomics 2009, Limassol, Cyprus, September 9-11, 2009, Revised Selected Papers},
Date-Added = {2010-02-13 16:13:10 +0100},
Date-Modified = {2012-11-24 09:44:22 +0000},
Doi = {10.1007/978-3-642-11482-3_7},
Editor = {Athanasios V. Vasilakos and Roberto Beraldi and Roy Friedman and Marco Mamei},
Issn = {1867-8211},
Pages = {98-113},
Publisher = {Springer},
Series = {{Lecture Notes of the Institute for Computer Sciences, Social-Informatics and Telecommunications Engineering (LNICST)}},
Title = {Integrating Autonomic Grid Components and Process-Driven Business Applications},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2010_BS_autonomics09.pdf},
Volume = {23},
Year = {2010},
Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-642-11482-3_7},
Bdsk-Url-2 = {http://calvados.di.unipi.it/storage/paper_files/2010_BS_autonomics09.pdf}}

2009

M. Aldinucci, M. Danelutto, and P. Kilpatrick, “Co-design of distributed systems using skeletons and autonomic management abstractions,” in Euro-par 2008 workshops – parallel processing, selected papers, Las Palmas, Spain, 2009, pp. 403-414. doi:10.1007/978-3-642-00955-6_46
[Abstract] [BibTeX] [Download PDF]

@inproceedings{abstraction:europarworkshop:09,
Abstract = {We discuss how common problems arising with multi/many-core distributed architectures can be effectively handled through co-design of parallel/distributed programming abstractions and of autonomic management of non-functional concerns. In particular, we demonstrate how restricted parallel/distributed patterns (or skeletons) may be efficiently managed by rule-based autonomic managers. We discuss the basic principles underlying pattern+manager co-design, current implementations inspired by this approach and some results achieved with a proof-of-concept prototype.},
Address = {Las Palmas, Spain},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick},
Booktitle = {Euro-Par 2008 Workshops - Parallel Processing, Selected Papers},
Date-Added = {2009-01-09 17:57:45 +0100},
Date-Modified = {2009-06-26 16:12:56 +0200},
Doi = {10.1007/978-3-642-00955-6_46},
Editor = {E. C{\'e}sar and M. Alexander and A. Streit and J.L. Tr{\"a}ff and C. C{\'e}rin and A. Kn{\"u}pfer and D. Kranzlm{\"u}ller and S. Jha},
Isbn = {978-3-642-00954-9},
Month = apr,
Pages = {403-414},
Publisher = {Springer},
Series = {LNCS},
Title = {Co-design of distributed systems using skeletons and autonomic management abstractions},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2009_abstraction_workshopeuropar.pdf},
Volume = {5415},
Year = {2009},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2009_abstraction_workshopeuropar.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-642-00955-6_46}}

M. Aldinucci, M. Danelutto, and P. Kilpatrick, “Autonomic management of non-functional concerns in distributed and parallel application programming,” in Proc. of intl. parallel & distributed processing symposium (ipdps), Rome, Italy, 2009, pp. 1-12. doi:10.1109/IPDPS.2009.5161034
[Abstract] [BibTeX] [Download PDF]

@inproceedings{beske:ipdps:09,
Abstract = {An approach to the management of non-functional concerns in massively parallel and/or distributed architectures that marries parallel programming patterns with autonomic computing is presented. The necessity and suitability of the adoption of autonomic techniques are evidenced. Issues arising in the implementation of autonomic managers taking care of multiple concerns and of coordination among hierarchies of such autonomic managers are discussed. Experimental results are presented that demonstrate the feasibility of the approach.},
Address = {Rome, Italy},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick},
Booktitle = {Proc. of Intl. Parallel \& Distributed Processing Symposium (IPDPS)},
Date-Added = {2008-12-09 18:58:37 +0100},
Date-Modified = {2009-06-07 22:30:35 +0200},
Doi = {10.1109/IPDPS.2009.5161034},
Month = {may},
Pages = {1-12},
Publisher = {IEEE},
Title = {Autonomic management of non-functional concerns in distributed and parallel application programming},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2009_f_nf_IPDPS.pdf},
Year = {2009},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2009_f_nf_IPDPS.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1109/IPDPS.2009.5161034}}

M. Aldinucci, M. Danelutto, and P. Kilpatrick, “Towards hierarchical management of autonomic components: a case study,” in Proc. of intl. euromicro pdp 2009: parallel distributed and network-based processing, Weimar, Germany, 2009, pp. 3-10. doi:10.1109/PDP.2009.48
[Abstract] [BibTeX] [Download PDF]

@inproceedings{beske:pdp:09,
Abstract = {We address the issue of autonomic management in hierarchical component-based distributed systems. The long term aim is to provide a modeling framework for autonomic management in which QoS goals can be defined, plans for system adaptation described and proofs of achievement of goals by (sequences of) adaptations furnished. Here we present an early step on this path. We restrict our focus to skeleton-based systems in order to exploit their well-defined structure. The autonomic cycle is described using the Orc system orchestration language while the plans are presented as structural modifications together with associated costs and benefits. A case study is presented to illustrate the interaction of managers to maintain QoS goals for throughput under varying conditions of resource availability.},
Address = {Weimar, Germany},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick},
Booktitle = {Proc. of Intl. Euromicro PDP 2009: Parallel Distributed and network-based Processing},
Date-Added = {2008-10-15 22:43:41 +0200},
Date-Modified = {2009-05-20 10:26:13 +0200},
Doi = {10.1109/PDP.2009.48},
Editor = {Didier El Baz and Tom Gross and Francois Spies},
Month = feb,
Pages = {3-10},
Publisher = {IEEE},
Title = {Towards hierarchical management of autonomic components: a case study},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2009_hier_man_PDP.pdf},
Year = {2009},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2009_hier_man_PDP.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1109/PDP.2009.48}}

M. Aldinucci, M. Torquati, and M. Meneghin, “FastFlow: efficient parallel streaming applications on multi-core,” Università di Pisa, Dipartimento di Informatica, Italy, TR-09-12, 2009.
[Abstract] [BibTeX] [URL] [Download PDF]

@techreport{fastflow:tr-09-12,
Abstract = {Shared memory multiprocessors come back to popularity thanks to rapid spreading of commodity multi-core architectures. As ever, shared memory programs are fairly easy to write and quite hard to optimise; providing multi-core programmers with optimising tools and programming frameworks is a nowadays challenge. Few efforts have been done to support effective streaming applications on these architectures. In this paper we introduce FastFlow, a low-level programming framework based on lock-free queues explicitly designed to support high-level languages for streaming applications. We compare FastFlow with state-of-the-art programming frameworks such as Cilk, OpenMP, and Intel TBB. We experimentally demonstrate that FastFlow is always more efficient than all of them in a set of micro-benchmarks and on a real world application; the speedup edge of FastFlow over other solutions might be bold for fine grain tasks, as an example +35% on OpenMP, +226% on Cilk, +96% on TBB for the alignment of protein
P01111 against UniProt DB using Smith-Waterman algorithm.},
Author = {Marco Aldinucci and Massimo Torquati and Massimiliano Meneghin},
Date-Added = {2010-02-13 16:20:18 +0100},
Date-Modified = {2013-11-24 00:39:38 +0000},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica, Italy},
Keywords = {fastflow},
Month = sep,
Number = {TR-09-12},
Title = {{FastFlow}: Efficient Parallel Streaming Applications on Multi-core},
Url = {http://arxiv.org/abs/0909.1187},
Year = {2009},
pdf = {http://calvados.di.unipi.it/storage/paper_files/TR-09-12.pdf},
Bdsk-Url-2 = {http://arxiv.org/abs/0909.1187}}

M. Aldinucci and M. Torquati, “Fastflow website,” , 2009.
[BibTeX]

@manual{fastflow:web,
Author = {Marco Aldinucci and Massimo Torquati},
Date-Added = {2009-10-22 17:57:29 +0200},
Date-Modified = {2010-07-07 01:26:23 +0200},
Note = {\url{http://mc-fastflow.sourceforge.net/}},
Title = {FastFlow website},
Year = {2009}}

M. Aldinucci, M. Danelutto, and P. Kilpatrick, “Autonomic management of multiple non-functional concerns in behavioural skeletons,” Università di Pisa, Dipartimento di Informatica, Italy, TR-09-10, 2009.
[BibTeX] [URL] [Download PDF]

@techreport{nf-concerns:tr-09-10,
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick},
Date-Added = {2009-09-25 22:49:07 +0200},
Date-Modified = {2013-12-08 14:58:33 +0000},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica, Italy},
Month = jul,
Number = {TR-09-10},
Title = {Autonomic management of multiple non-functional concerns in behavioural skeletons},
Url = {http://arxiv.org/abs/0909.1517},
Year = {2009},
pdf = {http://compass2.di.unipi.it/TR/Files/TR-09-10.pdf.gz},
Bdsk-Url-2 = {http://arxiv.org/abs/0909.1517}}

M. Aldinucci, M. Danelutto, and P. Kilpatrick, “Semi-formal models to support program development: autonomic management within component based parallel and distributed programming,” in Formal methods for components and objects: 7th intl. symposium, fmco 2008, sophia-antipolis, france, october 20 – 24, 2008, revised lectures, 2009, pp. 204-225. doi:10.1007/978-3-642-04167-9
[Abstract] [BibTeX] [Download PDF]

@inproceedings{semi-formal:fmco:09,
Abstract = {Functional and non-functional concerns require different programming effort, different techniques and different methodologies when attempting to program efficient parallel/distributed applications. In this work we present a ``programmer oriented'' methodology based on formal tools that permits reasoning about parallel/distributed program development and refinement. The proposed methodology is semi-formal in that it does not require the exploitation of highly formal tools and techniques, while providing a palatable and effective support to programmers developing parallel/distributed applications, in particular when handling non-functional concerns.},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick},
Booktitle = {Formal Methods for Components and Objects: 7th Intl. Symposium, FMCO 2008, Sophia-Antipolis, France, October 20 - 24, 2008, Revised Lectures},
Date-Added = {2009-06-07 16:05:13 +0200},
Date-Modified = {2009-08-30 17:11:01 +0200},
Doi = {10.1007/978-3-642-04167-9},
Editor = {Frank S. de Boer and Marcello M. Bonsangue and Eric Madelaine},
Pages = {204-225},
Publisher = {Springer},
Series = {LNCS},
Title = {Semi-formal models to support program development: autonomic management within component based parallel and distributed programming},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2009_semiformal_FMCO08.pdf},
Volume = {5751},
Year = {2009},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2009_semiformal_FMCO08.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-642-04167-9}}

M. Aldinucci, H. L. Bouziane, M. Danelutto, and C. Pérez, “STKM on SCA: a unified framework with components, workflows and algorithmic skeletons,” in Proc. of 15th intl. euro-par 2009 parallel processing, Delft, The Netherlands, 2009, pp. 678-690. doi:10.1007/978-3-642-03869-3
[Abstract] [BibTeX] [Download PDF]

@inproceedings{stkm:europar:09,
Abstract = {This paper investigates an implementation of STKM, a Spatio-Temporal sKeleton Model. STKM expands the Grid Component Model (GCM) with an innovative programmable approach that allows programmers to compose an application by combining component, workflow and skeleton concepts. The paper deals with a projection of the STKM model on top of SCA and it evaluates its implementation using Tuscany Java SCA. Experimental results show the need and the benefits of the high level of abstraction offered by STKM.},
Address = {Delft, The Netherlands},
Author = {Marco Aldinucci and Hinde Lilia Bouziane and Marco Danelutto and Christian P{\'e}rez},
Booktitle = {Proc. of 15th Intl. Euro-Par 2009 Parallel Processing},
Date-Modified = {2009-12-03 00:58:56 +0100},
Doi = {10.1007/978-3-642-03869-3},
Month = aug,
Pages = {678-690},
Publisher = {Springer},
Series = {LNCS},
Title = {{STKM} on {SCA}: a Unified Framework with Components, Workflows and Algorithmic Skeletons},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2009_STKM_Europar.pdf},
Volume = {5704},
Year = {2009},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2009_STKM_Europar.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-642-03869-3}}

G. Antichi, C. Callegari, D. A. Pietro, D. Ficara, S. Giordano, F. Vitucci, M. Meneghin, M. Torquati, M. Vanneschi, and M. Coppola, “A high level development, modeling and simulation methodology for complex multicore network processors,” in Proc. of the intl. symposium on performance evaluation of computer and telecommunication systems (spects), Instanbul, Turkey, 2009.
[BibTeX]

@inproceedings{frimp:networkproc:09,
Address = {Instanbul, Turkey},
Author = {Gianni Antichi and Christian Callegari and A. Di Pietro and Domenico Ficara and Stefano Giordano and Fabio Vitucci and Massimiliano Meneghin and Massimo Torquati and Marco Vanneschi and Massimo Coppola},
Booktitle = {Proc. of the Intl. Symposium on Performance Evaluation of Computer and Telecommunication Systems (SPECTS)},
Date-Added = {2009-09-07 16:54:39 +0200},
Date-Modified = {2009-09-07 17:02:38 +0200},
Keywords = {incomplete},
Month = jul,
Publisher = {IEEE},
Title = {A High Level Development, Modeling and Simulation Methodology for Complex Multicore Network Processors},
Year = {2009}}

C. Bertolli, D. Buono, G. Mencagli, and M. Vanneschi, “Expressing adaptivity and context awareness in the ASSISTANT programming model,” in Proc. of autonomics: 3rd intl. icst conference on autonomic computing and communication systems, Limassol, Cyprus, 2009, pp. 32-47. doi:10.1007/978-3-642-11482-3
[BibTeX]

@inproceedings{assistant:autonomics:09,
Address = {Limassol, Cyprus},
Annote = {ISBN: 978-3-642-11481-6},
Author = {Carlo Bertolli and Daniele Buono and Gabriele Mencagli and Marco Vanneschi},
Booktitle = {Proc. of Autonomics: 3rd Intl. ICST Conference on Autonomic Computing and Communication Systems},
Date-Added = {2009-09-07 17:09:04 +0200},
Date-Modified = {2010-05-23 22:45:49 +0200},
Doi = {10.1007/978-3-642-11482-3},
Editor = {Athanasios V. Vasilakos and Roberto Beraldi and Roy Friedman and Marco Mamei},
Month = sep,
Pages = {32-47},
Publisher = {Springer},
Series = {{Lecture Notes of the Institute for Computer Sciences, Social-Informatics and Telecommunications Engineering (LNICST)}},
Title = {Expressing Adaptivity and Context Awareness in the {ASSISTANT} Programming Model},
Volume = {23},
Year = {2009},
Bdsk-Url-1 = {http://dx.doi.org/10.1007/978-3-642-11482-3}}

R. Fantacci, M. Vanneschi, C. Bertolli, G. Mencagli, and D. Tarchi, “Next generation grids and wireless communication networks: towards a novel integrated approach,” Wireless communications and mobile computing, vol. 9, iss. 4, pp. 445-467, 2009. doi:10.1002/wcm.689
[BibTeX]

@article{insyeme:j:09,
Author = {Romano Fantacci and Marco Vanneschi and Carlo Bertolli and Gabriele Mencagli and Daniele Tarchi},
Date-Added = {2008-10-31 16:05:51 +0100},
Date-Modified = {2009-06-07 22:27:59 +0200},
Doi = {10.1002/wcm.689},
Journal = {Wireless Communications and Mobile Computing},
Number = {4},
Pages = {445-467},
Title = {Next generation grids and wireless communication networks: towards a novel integrated approach},
Volume = {9},
Year = {2009},
Bdsk-Url-1 = {http://dx.doi.org/10.1002/wcm.689}}

2008

M. Aldinucci, G. Antoniu, M. Danelutto, and M. Jan, “Fault-tolerant data sharing for high-level grid programming: a hierarchical storage architecture,” in Achievements in european research on grid systems, M. Bubak, S. Gorlatch, and T. Priol, Eds., Kraków, Poland: Springer, 2008, pp. 67-81. doi:10.1007/978-0-387-72812-4_6
[Abstract] [BibTeX] [Download PDF]

@incollection{assist:juxmem:IW_book:07,
Abstract = {Enabling high-level programming models on grids is today a major challenge. A way to achieve this goal relies on the use of environments able to transparently and automatically provide adequate support for low-level, grid-specific issues (fault-tolerance, scalability, etc.). This paper discusses the above approach when applied to grid data management. As a case study, we propose a 2-tier software architecture that supports transparent, fault-tolerant, grid-level data sharing in the ASSIST programming environment (University of Pisa), based on the JuxMem grid data sharing service (INRIA Rennes).},
Address = {Krak{\'o}w, Poland},
Author = {Marco Aldinucci and Gabriel Antoniu and Marco Danelutto and Mathieu Jan},
Booktitle = {Achievements in European Research on Grid Systems},
Date-Added = {2007-06-26 01:31:31 +0200},
Date-Modified = {2012-11-18 17:45:08 +0000},
Doi = {10.1007/978-0-387-72812-4_6},
Editor = {Marian Bubak and Sergei Gorlatch and Thierry Priol},
Isbn = {978-0-387-72811-7},
Month = nov,
Pages = {67-81},
Publisher = {Springer},
Series = {CoreGRID},
Title = {Fault-Tolerant Data Sharing for High-level Grid Programming: A Hierarchical Storage Architecture},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2007_IW06_book_juxadhocmem.pdf},
Year = {2008},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2007_IW06_book_juxadhocmem.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-0-387-72812-4_6}}

M. Aldinucci, S. Campa, M. Danelutto, P. Dazzi, P. Kilpatrick, D. Laforenza, and N. Tonellotto, “Behavioural skeletons for component autonomic management on grids,” in Making grids work, M. Danelutto, P. Frangopoulou, and V. Getov, Eds., Springer, 2008, pp. 3-16. doi:10.1007/978-0-387-78448-9_1
[Abstract] [BibTeX] [Download PDF]

@incollection{beske:cg_book:08,
Abstract = {Autonomic management can improve the QoS provided by parallel/distributed applications. Within the CoreGRID Component Model, the autonomic management is tailored to the automatic -- monitoring-driven -- alteration of the component assembly and, therefore, is defined as the effect of (distributed)management code.
This work yields a semantics based on hypergraph rewriting suitable tomodel the dynamic evolution and non-functional aspects of Service Oriented Architectures and component-based autonomic applications. In this regard, our main goal is to provide a formal description of adaptation operations that are typically only informally specified. We advocate that our approach makes easier to raise the level of abstraction of management code in autonomic and adaptive applications.},
Author = {Marco Aldinucci and Sonia Campa and Marco Danelutto and Patrizio Dazzi and Peter Kilpatrick and Domenico Laforenza and Nicola Tonellotto},
Booktitle = {Making Grids Work},
Chapter = {Component Programming Models},
Date-Added = {2007-12-09 22:26:46 +0100},
Date-Modified = {2008-11-17 20:07:48 +0100},
Doi = {10.1007/978-0-387-78448-9_1},
Editor = {Marco Danelutto and Paraskevi Frangopoulou and Vladimir Getov},
Isbn = {978-0-387-78447-2},
Month = aug,
Pages = {3-16},
Publisher = {Springer},
Series = {CoreGRID},
Title = {Behavioural skeletons for component autonomic management on grids},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2007_beske_cg_crete_book.pdf},
Year = {2008},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2007_beske_cg_crete_book.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-0-387-78448-9_1}}

M. Aldinucci, M. Torquati, M. Vanneschi, and P. Zuccato, “The virtualinux storage abstraction layer for efficient virtual clustering,” in Proc. of intl. euromicro pdp 2008: parallel distributed and network-based processing, Toulouse, France, 2008, pp. 619-627. doi:10.1109/PDP.2008.86
[Abstract] [BibTeX] [Download PDF]

@inproceedings{vlinux:pdp:08,
Abstract = {VirtuaLinux is a meta-distribution that enables a standard Linux distribution to support robust physical and virtualized clusters. VirtuaLinux helps in avoiding the "single point of failure" effect by means of a combination of architectural strategies, including the transparent support for disk-less and master-less cluster configuration. VirtuaLinux supports the creation and management of Virtual Clusters in seamless way: VirtuaLinux Virtual Cluster Manager enables the system administrator to create, save, restore Xen-based Virtual Clusters, and to map and dynamically re-map them onto the nodes of the physical cluster. In this paper we introduce and discuss VirtuaLinux virtualization architecture, features, and tools, and in particular, the novel disk abstraction layer, which permits the fast and space-efficient creation of Virtual Clusters.},
Address = {Toulouse, France},
Author = {Marco Aldinucci and Massimo Torquati and Marco Vanneschi and Pierfrancesco Zuccato},
Booktitle = {Proc. of Intl. Euromicro PDP 2008: Parallel Distributed and network-based Processing},
Date-Added = {2009-11-10 01:29:09 +0100},
Date-Modified = {2009-11-10 01:29:09 +0100},
Doi = {10.1109/PDP.2008.86},
Editor = {Didier El Baz and Julien Bourgeois and Francois Spies},
Month = feb,
Pages = {619-627},
Publisher = {IEEE},
Title = {The VirtuaLinux Storage Abstraction Layer for Efficient Virtual Clustering},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2008_VirtuaLinux_PDP.pdf},
Year = {2008},
Bdsk-Url-1 = {http://dx.doi.org/10.1109/PDP.2008.86},
Bdsk-Url-2 = {http://calvados.di.unipi.it/storage/paper_files/2008_VirtuaLinux_PDP.pdf}}

M. Aldinucci, M. Danelutto, M. Torquati, F. Polzella, G. Spinatelli, M. Vanneschi, A. Gervaso, M. Cacitti, and P. Zuccato, “VirtuaLinux: virtualized high-density clusters with no single point of failure,” in Parallel computing: architectures, algorithms and applications, The Netherlands, 2008, pp. 355-362.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{virtualinux:parco:07,
Abstract = {VirtuaLinux is a Linux meta-distribution that allows the creation, deployment and administration of both physical and virtualized clusters with no single point of failure. VirtuaLinux supports the creation and management of virtual clusters in seamless way: VirtuaLinux Virtual Cluster Manager enables the system administrator to create, save, restore Xen-based virtual clusters, and to map and dynamically remap them onto the nodes of the physical cluster. We introduces and discuss VirtuaLinux virtualization architecture, features, and tools. These rely on a novel disk abstraction layer, which enables the fast, space-efficient, dynamic creation of virtual clusters composed of fully independent complete virtual machines.},
Address = {The Netherlands},
Annote = {Parco 2007},
Author = {Marco Aldinucci and Marco Danelutto and Massimo Torquati and Francesco Polzella and Gianmarco Spinatelli and Marco Vanneschi and Alessandro Gervaso and Manuel Cacitti and Pierfrancesco Zuccato},
Booktitle = {Parallel Computing: Architectures, Algorithms and Applications},
Date-Added = {2007-06-26 01:43:08 +0200},
Date-Modified = {2012-11-18 17:56:09 +0000},
Editor = {C. Bischof and M. B{\"u}cker and P. Gibbon and G. R. Joubert and T. Lippert and B. Mohr and F. J. Peters},
Pages = {355-362},
Publisher = {IOS press},
Series = {ADVANCES IN PARALLEL COMPUTING},
Title = {{VirtuaLinux}: virtualized high-density clusters with no single point of failure},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2007_vlinux_parco.pdf},
Volume = {15},
Year = {2008},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2007_vlinux_parco.pdf}}

M. Aldinucci, M. Danelutto, H. L. Bouziane, and C. Pérez, “Towards software component assembly language enhanced with workflows and skeletons,” in Proc. of the acm sigplan component-based high performance computing (cbhpc), New York, NY, USA, 2008, pp. 1-11. doi:10.1145/1456190.1456194
[Abstract] [BibTeX] [Download PDF]

@inproceedings{stkm:CBHPC:08,
Abstract = {We explore the possibilities offered by a programming model supporting components, workflows and skeletons. In particular we describe how Stcm (Spatio-Temporal Component Model), an already existing programming model supporting components and workflows, can be extended to also provide algorithmic skeleton concepts. Programmers are therefore enabled to assembly applications specifying both temporal and spatial relations among components and instantiating predefined skeleton composite components to implement all those application parts that can be easily modeled with the available skeletons. We discuss preliminary results as well as the benefits deriving from Stkm (Spatio-Temporal sKeleton Model) adoption in a couple of real applications.},
Address = {New York, NY, USA},
Author = {Aldinucci, Marco and Danelutto, Marco and Bouziane, Hinde Lilia and P{\'e}rez, Christian},
Booktitle = {Proc. of the ACM SIGPLAN Component-Based High Performance Computing (CBHPC)},
Date-Modified = {2008-11-17 18:33:20 +0100},
Doi = {10.1145/1456190.1456194},
Isbn = {978-1-60558-311-2},
Location = {Karlsruhe, Germany},
Month = oct,
Pages = {1-11},
Publisher = {ACM},
Title = {Towards Software Component Assembly Language Enhanced with Workflows and Skeletons},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2008_CBHPC.pdf},
Year = {2008},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2008_CBHPC.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1145/1456190.1456194}}

M. Aldinucci and M. Danelutto, “Securing skeletal systems with limited performance penalty: the Muskel experience,” Journal of systems architecture, vol. 54, iss. 9, pp. 868-876, 2008. doi:10.1016/j.sysarc.2008.02.008
[Abstract] [BibTeX] [Download PDF]

@article{security:jsa:07,
Abstract = {Algorithmic skeletons have been exploited to implement several parallel programming environments, targeting workstation clusters as well as workstation networks and computational grids. When targeting non-dedicated clusters, workstation networks and grids, security has to be taken adequately into account in order to guarantee both code and data confidentiality and integrity. However, introducing security is usually an expensive activity, both in terms of the effort required to managed security mechanisms and in terms of the time spent performing security related activities at run time.We discuss the cost of security introduction as well as how some features typical of skeleton technology can be exploited to improve the efficiency code and data securing in a typical skeleton based parallel programming environment and we evaluate the performance cost of security mechanisms implemented exploiting state of the art tools. In particular, we take into account the cost of security introduction in muskel,
a Java based skeletal system exploiting macro data flow implementation technology. We consider the adoption of mechanisms that allow securing all the communications involving remote, unreliable nodes and we evaluate the cost of such mechanisms. Also, we consider the implications on the computational grains needed to scale secure and insecure skeletal computations.},
Author = {Marco Aldinucci and Marco Danelutto},
Date-Added = {2007-10-31 19:23:37 +0100},
Date-Modified = {2014-08-24 22:18:21 +0000},
Doi = {10.1016/j.sysarc.2008.02.008},
Journal = {Journal of Systems Architecture},
Month = sep,
Number = {9},
Pages = {868-876},
Publisher = {Elsevier},
Title = {Securing skeletal systems with limited performance penalty: the {Muskel} experience},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2008_security_JSA.pdf},
Volume = {54},
Year = {2008},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2008_security_JSA.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1016/j.sysarc.2008.02.008}}

M. Aldinucci, M. Danelutto, G. Zoppi, and P. Kilpatrick, “Advances in autonomic components & services,” in From grids to service and pervasive computing (proc. of the coregrid symposium 2008), Las Palmas, Spain, 2008, pp. 3-18. doi:10.1007/978-0-387-09455-7_1
[Abstract] [BibTeX] [Download PDF]

@inproceedings{sca:cgsymph:08,
Abstract = {Hierarchical autonomic management of structured grid applications can be efficiently implemented using production rule engines. Rules of the form "precondition-to-action" can be used to model the behaviour of autonomic managers in such a way that the autonomic control and the application management strategy are kept separate. This simplifies the manager design as well as user customization of autonomic manager policies. We briefly introduce rule-based autonomic managers. Then we discuss an implementation of a GCM-like behavioural skeleton -- a composite component modelling a standard parallelism exploitation pattern with its own autonomic controller -- in SCA/Tuscany. The implementation uses the JBoss rules engine to provide an autonomic behavioural skeleton component and services to expose the component functionality to the standard service framework. Performance results are discussed and finally similarities and differences with respect to the ProActive-based reference GCM implementation are discussed briefly.},
Address = {Las Palmas, Spain},
Author = {Marco Aldinucci and Marco Danelutto and Giorgio Zoppi and Peter Kilpatrick},
Booktitle = {From Grids To Service and Pervasive Computing (Proc. of the CoreGRID Symposium 2008)},
Date-Added = {2008-05-11 18:42:40 +0200},
Date-Modified = {2012-11-17 16:11:44 +0000},
Doi = {10.1007/978-0-387-09455-7_1},
Editor = {Thierry Priol and Marco Vanneschi},
Isbn = {978-0-387-09454-0},
Month = aug,
Pages = {3-18},
Publisher = {Springer},
Series = {CoreGRID},
Title = {Advances in Autonomic Components {\&} Services},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2008_SCA_cgsymph.pdf},
Year = {2008},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2008_SCA_cgsymph.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-0-387-09455-7_1}}

M. Aldinucci, S. Campa, M. Danelutto, M. Vanneschi, P. Dazzi, D. Laforenza, N. Tonellotto, and P. Kilpatrick, “Behavioural skeletons in GCM: autonomic management of grid components,” in Proc. of intl. euromicro pdp 2008: parallel distributed and network-based processing, Toulouse, France, 2008, pp. 54-63. doi:10.1109/PDP.2008.46
[Abstract] [BibTeX] [Download PDF]

@inproceedings{orc:pdp:08,
Abstract = {Autonomic management can be used to improve the QoS provided by parallel/distributed applications. We discuss behavioural skeletons introduced in earlier work: rather than relying on programmer ability to design "from scratch" efficient autonomic policies, we encapsulate general autonomic controller features into algorithmic skeletons. Then we leave to the programmer the duty of specifying the parameters needed to specialise the skeletons to the needs of the particular application at hand. This results in the programmer having the ability to fast prototype and tune distributed/parallel applications with non-trivial autonomic management capabilities. We discuss how behavioural skeletons have been implemented in the framework of GCM (the grid component model developed within the CoreGRID NoE and currently being implemented within the GridCOMP STREP project). We present results evaluating the overhead introduced by autonomic management activities as well as the overall behaviour of the skeletons. We also present results achieved with a long running application subject to autonomic management and dynamically adapting to changing features of the target architecture. Overall the results demonstrate both the feasibility of implementing autonomic control via behavioural skeletons and the effectiveness of our sample behavioural skeletons in managing the "functional replication" pattern(s).},
Address = {Toulouse, France},
Author = {Marco Aldinucci and Sonia Campa and Marco Danelutto and Marco Vanneschi and Patrizio Dazzi and Domenico Laforenza and Nicola Tonellotto and Peter Kilpatrick},
Booktitle = {Proc. of Intl. Euromicro PDP 2008: Parallel Distributed and network-based Processing},
Date-Added = {2007-10-09 12:13:13 +0200},
Date-Modified = {2009-02-05 23:55:55 +0100},
Doi = {10.1109/PDP.2008.46},
Editor = {Didier El Baz and Julien Bourgeois and Francois Spies},
Month = feb,
Pages = {54-63},
Publisher = {IEEE},
Title = {Behavioural skeletons in {GCM}: autonomic management of grid components},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2008_orc_PDP.pdf},
Year = {2008},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2008_orc_PDP.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1109/PDP.2008.46}}

M. Aldinucci, M. Danelutto, and P. Kilpatrick, “A framework for prototyping and reasoning about grid systems,” in Parallel computing: architectures, algorithms and applications, Germany, 2008, pp. 235-242.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{orc:parco:07,
Abstract = {A framework supporting fast prototyping as well as tuning of distributed applications is presented. The approach is based on the adoption of a formal model that is used to describe the orchestration of distributed applications. The formal model (Orc by Misra and Cook) can be used to support semi-formal reasoning about the applications at hand. The paper describes how the framework can be used to derive and evaluate alternative orchestrations of a well know parallel/distributed computation pattern; and shows how the same formal model can be used to support generation of prototypes of distributed applications skeletons directly from the application description.},
Address = {Germany},
Annote = {Parco 2007},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick},
Booktitle = {Parallel Computing: Architectures, Algorithms and Applications},
Date-Added = {2007-06-26 01:48:06 +0200},
Date-Modified = {2012-11-18 17:48:22 +0000},
Editor = {C. Bischof and M. B{\"u}cker and P. Gibbon and G. R. Joubert and T. Lippert and B. Mohr and F. J. Peters},
Isbn = {9781586037963},
Pages = {235-242},
Publisher = {IOS press},
Series = {ADVANCES IN PARALLEL COMPUTING},
Title = {A framework for prototyping and reasoning about grid systems},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2007_orc_parco.pdf},
Volume = {15},
Year = {2008},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2007_orc_parco.pdf}}

M. Aldinucci, M. Danelutto, P. Kilpatrick, and P. Dazzi, “From Orc models to distributed grid Java code,” in Proc. of the integrated research in grid computing workshop, Hersonissos, Crete, Greece, 2008, pp. 2-13.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{orc:IW:08,
Abstract = {We present O2J, a Java library that allows implementation of Orc programs on distributed architectures including grids and clusters/networks of workstations. With minimal programming effort the grid programmer may implement Orc programs, as he/she is not required to write any low level code relating to distributed orchestration of the computation but only that required to implement Orc expressions. Using the prototype O2J implementation, grid application developers can reason about abstract grid orchestration code described in Orc. Once the required orchestration has been determined and its properties analysed, a grid application prototype can be simply, efficiently and quickly implemented by taking the Orc code, rewriting it into corresponding Java/O2J syntax and finally providing the functional code implementing the sites and processes involved. The proposed modus operandi brings a Model Driven Engineering approach to grid application development.},
Address = {Hersonissos, Crete, Greece},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick and Patrizio Dazzi},
Booktitle = {Proc. of the Integrated Research in Grid Computing Workshop},
Date-Added = {2008-02-09 16:59:20 +0100},
Date-Modified = {2012-11-18 18:07:06 +0000},
Editor = {Sergei Gorlatch and Paraskevi Fragopoulou and Thierry Priol},
Keywords = {Duplicate},
Month = apr,
Pages = {2-13},
Series = {CoreGRID},
Title = {From {Orc} Models to Distributed Grid {Java} code},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2008_IW_O2J.pdf},
Year = {2008},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2008_IW_O2J.pdf}}

M. Aldinucci, M. Danelutto, P. Kilpatrick, and P. Dazzi, “From Orc models to distributed grid Java code,” in Grid computing: achievements and prospects, S. Gorlatch, P. Fragopoulou, and T. Priol, Eds., Springer, 2008, pp. 13-24. doi:10.1007/978-0-387-09457-1_2
[Abstract] [BibTeX] [Download PDF]

@incollection{orc:IW_book:08,
Abstract = {We present O2J, a Java library that allows implementation of Orc programs on distributed architectures including grids and clusters/networks of workstations. With minimal programming effort the grid programmer may implement Orc programs, as he/she is not required to write any low level code relating to distributed orchestration of the computation but only that required to implement Orc expressions. Using the prototype O2J implementation, grid application developers can reason about abstract grid orchestration code described inOrc. Once the required orchestration has been determined and its properties analysed, a grid application prototype can be simply, efficiently and quickly implemented by taking the Orc code, rewriting it into corresponding Java/O2J syntax and finally providing the functional code implementing the sites and processes involved. The proposed modus operandi brings aModel Driven Engineering approach to grid application development.},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick and Patrizio Dazzi},
Booktitle = {Grid Computing: Achievements and Prospects},
Date-Added = {2008-11-16 16:26:47 +0100},
Date-Modified = {2015-02-21 14:30:35 +0000},
Doi = {10.1007/978-0-387-09457-1_2},
Editor = {Sergei Gorlatch and Paraskevi Fragopoulou and Thierry Priol},
Isbn = {978-0-387-09456-4},
Pages = {13-24},
Publisher = {Springer},
Series = {CoreGRID},
Title = {From {Orc} Models to Distributed Grid {Java} code},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2008_IW_book_O2J.pdf},
Year = {2008},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2008_IW_book_O2J.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-0-387-09457-1},
Bdsk-Url-3 = {http://dx.doi.org/10.1007/978-0-387-09457-1_2}}

M. Aldinucci, M. Danelutto, H. L. Bouziane, and C. Pérez, “Towards a Spatio-Temporal sKeleton Model implementation on top of SCA,” Institute on Programming Model, CoreGRID – Network of Excellence, TR-0171, 2008.
[BibTeX] [Download PDF]

@techreport{coregrid:tr0171,
Author = {Aldinucci, Marco and Danelutto, Marco and Bouziane, Hinde Lilia and P{\'e}rez, Christian},
Date-Added = {2009-09-08 16:12:10 +0200},
Date-Modified = {2014-06-21 22:18:57 +0000},
Institution = {Institute on Programming Model, CoreGRID - Network of Excellence},
Month = sep,
Number = {TR-0171},
Title = {Towards a {S}patio-{T}emporal s{K}eleton {M}odel implementation on top of {SCA}},
pdf = {http://calvados.di.unipi.it/storage/paper_files/tr-0171.pdf},
Year = {2008},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/tr-0171.pdf}}

M. Aldinucci, M. Danelutto, H. L. Bouziane, and C. Pérez, “Towards software component assembly language enhanced with workflows and skeletons,” Institute on Programming Model, CoreGRID – Network of Excellence, TR-0153, 2008.
[BibTeX] [Download PDF]

@techreport{coregrid:tr0153,
Author = {Aldinucci, Marco and Danelutto, Marco and Bouziane, Hinde Lilia and P{\'e}rez, Christian},
Institution = {Institute on Programming Model, CoreGRID - Network of Excellence},
Month = jul,
Number = {TR-0153},
Title = {Towards Software Component Assembly Language Enhanced with Workflows and Skeletons},
pdf = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0153.pdf},
Year = {2008},
Bdsk-Url-1 = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0153.pdf}}

M. Aldinucci, S. Campa, M. Coppola, M. Danelutto, G. Zoppi, A. Basso, A. Bolotov, F. Baude, H. L. Bouziane, D. Caromel, L. Henrio, C. Pérez, J. Cunha, C. Michael, P. Classen, C. Lengauer, J. Cohen, S. Mc Gough, N. Currle-Linde, P. Dazzi, N. Tonellotto, J. Dünnweber, S. Gorlatch, P. Kilpatrick, N. Ranaldo, and E. Zimeo, “Proceedings of the programming model institute technical meeting 2008,” Institute of Programming Model, CoreGRID – Network of Excellence, TR-0138, 2008.
[BibTeX] [Download PDF]

@techreport{coregrid:tr0138,
Author = {Aldinucci, Marco and Campa, Sonia and Coppola, Massimo and Danelutto, Marco and Zoppi, G. and Basso, Alessandro and Bolotov, Alexander and Baude, Francoise and Bouziane, Hinde Lilia and Caromel, Denis and Henrio, Ludovic and P{\'e}rez, Christian and Cunha, Jose and Michael, Classen and Classen, Philipp and Lengauer, Christian and Cohen, J. and Mc Gough, S. and Currle-Linde, Natalia and Dazzi, Patrizio and Tonellotto, Nicola and D{\"u}nnweber, Jan and Gorlatch, Sergei and Kilpatrick, Peter and Ranaldo, Nadia and Zimeo, Eugenio},
Institution = {Institute of Programming Model, CoreGRID - Network of Excellence},
Month = may,
Number = {TR-0138},
Title = {Proceedings of the Programming Model Institute Technical meeting 2008},
pdf = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0138.pdf},
Year = {2008},
Bdsk-Url-1 = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0138.pdf}}

M. Aldinucci, M. Danelutto, and P. Kilpatrick, “Hierarchical autonomic management: a case study with skeletal systems,” Institute on Programming Model, CoreGRID – Network of Excellence, TR-0127, 2008.
[BibTeX] [Download PDF]

@techreport{coregrid:tr0127,
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick},
Date-Added = {2008-05-15 23:44:59 +0200},
Date-Modified = {2014-06-21 22:13:51 +0000},
Institution = {Institute on Programming Model, CoreGRID - Network of Excellence},
Month = feb,
Number = {TR-0127},
Title = {Hierarchical autonomic management: a case study with skeletal systems},
pdf = {http://calvados.di.unipi.it/storage/paper_files/tr-0127.pdf},
Year = {2008},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/tr-0127.pdf}}

C. Bertolli, R. Fantacci, G. Mencagli, D. Tarchi, and M. Vanneschi, “Next generation grids and wireless communication networks: towards a novel integrated approach,” Wireless communications and mobile computing, 2008.
[BibTeX]

@article{position:insyeme:09,
Author = {Carlo Bertolli and Romano Fantacci and Gabriele Mencagli and Daniele Tarchi and Marco Vanneschi},
Date-Added = {2008-10-15 23:08:23 +0200},
Date-Modified = {2008-11-16 18:18:07 +0100},
Journal = {Wireless Communications and Mobile Computing},
Note = {To appear},
Title = {Next generation grids and wireless communication networks: towards a novel integrated approach},
Year = {2008}}

M. Danelutto and G. Zoppi, “Behavioural skeletons meeting services,” in Proc. of iccs: intl. conference on computational science, workshop on practical aspects of high-level parallel programming, Krakow, Poland, 2008, pp. 146-153. doi:10.1007/978-3-540-69384-0
[BibTeX] [URL]

@inproceedings{zoppi:sca:papp:2008,
Address = {Krakow, Poland},
Author = {Marco Danelutto and Giorgio Zoppi},
Booktitle = {Proc. of ICCS: Intl. Conference on Computational Science, Workshop on Practical Aspects of High-level Parallel Programming},
Date-Added = {2008-04-22 18:50:12 +0200},
Date-Modified = {2009-01-25 23:36:00 +0100},
Doi = {10.1007/978-3-540-69384-0},
Month = jun,
Pages = {146-153},
Publisher = {Springer},
Series = {LNCS},
Title = {Behavioural skeletons meeting Services},
Url = {http://www.springerlink.com/content/m186g31118144078/},
Volume = {5101},
Year = {2008},
Bdsk-Url-1 = {http://www.springerlink.com/content/m186g31118144078/},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-540-69384-0}}

A. Oprescu, T. Kielmann, M. Danelutto, and M. Aldinucci, “Autonomic behavior of grid applications using component platforms,” Institute on Programming Model, CoreGRID – Network of Excellence, TR-0156, 2008.
[BibTeX] [Download PDF]

@techreport{coregrid:tr0156,
Author = {Ana-Maria Oprescu and Thilo Kielmann and Marco Danelutto and Marco Aldinucci},
Date-Modified = {2014-06-21 15:24:31 +0000},
Institution = {Institute on Programming Model, CoreGRID - Network of Excellence},
Month = jul,
Number = {TR-0156},
Title = {Autonomic Behavior of Grid Applications using Component Platforms},
pdf = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0156.pdf},
Year = {2008},
Bdsk-Url-1 = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0002.pdf},
Bdsk-Url-2 = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0156.pdf}}

2007

M. Aldinucci, M. Danelutto, and P. Dazzi, “Muskel: an expandable skeleton environment,” Scalable computing: practice and experience, vol. 8, iss. 4, pp. 325-341, 2007.
[Abstract] [BibTeX] [URL] [Download PDF]

@article{muskel:SCPE:07,
Abstract = {Programming models based on algorithmic skeletons promise to raise the level of abstraction perceived by programmers when implementing parallel applications, while guaranteeing good performance figures. At the same time, however, they restrict the freedom of programmers to implement arbitrary parallelism exploitation patterns. In fact, efficiency is achieved by restricting the parallelism exploitation patterns provided to the programmer to the useful ones for which efficient implementations, as well as useful and efficient compositions, are known. In this work we introduce muskel, a full Java library targeting workstation clusters, networks and grids and providing the programmers with a skeleton based parallel programming environment. muskel is implemented exploiting (macro) data flow technology, rather than the more usual skeleton technology relying on the use of implementation templates. Using data flow, muskel easily and efficiently implements both classical, predefined skeletons, and
user-defined parallelism exploitation patterns. This provides a means to overcome some of the problems that Cole identified in his skeleton ``manifesto'' as the issues impairing skeleton success in the parallel programming arena. We discuss fully how user-defined skeletons are supported by exploiting a data flow implementation, experimental results and we also discuss extensions supporting the further characterization of skeletons with non-functional properties, such as security, through the use of Aspect Oriented Programming and annotations.},
Author = {Marco Aldinucci and Marco Danelutto and Patrizio Dazzi},
Date-Added = {2007-06-26 01:27:03 +0200},
Date-Modified = {2014-08-24 22:17:35 +0000},
Journal = {Scalable Computing: Practice and Experience},
Month = dec,
Number = {4},
Pages = {325-341},
Title = {MUSKEL: an expandable skeleton environment},
Url = {http://www.scpe.org/index.php/scpe/article/view/429},
Volume = {8},
Year = {2007},
pdf = {http://www.scpe.org/vols/vol08/no4/SCPE_8_4_01.pdf},
Bdsk-Url-2 = {http://calvados.di.unipi.it/storage/paper_files/2007_SCPE_muskel.pdf},
Bdsk-Url-3 = {http://www.scpe.org/index.php/scpe/article/view/429}}

M. Aldinucci and M. Danelutto, “The cost of security in skeletal systems,” in Proc. of intl. euromicro pdp 2007: parallel distributed and network-based processing, Napoli, Italia, 2007, pp. 213-220. doi:10.1109/PDP.2007.79
[Abstract] [BibTeX] [Download PDF]

@inproceedings{security:euromicro:07,
Abstract = {Skeletal systems exploit algorithmical skeletons technology to provide the user very high level, efficient parallel programming environments. They have been recently demonstrated to be suitable for highly distributed architectures, such as workstation clusters, networks and grids. However, when using skeletal system for grid programming care must be taken to secure data and code transfers across non-dedicated, non-secure network links. In this work we take into account the cost of security introduction in muskel, a Java based skeletal system exploiting macro data flow implementation technology. We consider the adoption of mechanisms that allow securing all the communications taking place between remote, unreliable nodes and we evaluate the cost of such mechanisms. In particular, we consider the implications on the computational grains needed to scale secure and insecure skeletal computations.},
Address = {Napoli, Italia},
Author = {Marco Aldinucci and Marco Danelutto},
Booktitle = {Proc. of Intl. Euromicro PDP 2007: Parallel Distributed and network-based Processing},
Date-Added = {2007-03-08 15:44:26 +0100},
Date-Modified = {2008-02-18 12:49:23 +0100},
Doi = {10.1109/PDP.2007.79},
Editor = {Pasqua D'Ambra and Mario Rosario Guarracino},
Month = feb,
Pages = {213-220},
Publisher = {IEEE},
Title = {The cost of security in skeletal systems},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2007_security_PDP.pdf},
Year = {2007},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2007_security_PDP.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1109/PDP.2007.79}}

M. Aldinucci, S. Campa, M. Danelutto, P. Dazzi, P. Kilpatrick, D. Laforenza, and N. Tonellotto, “Behavioural skeletons for component autonomic management on grids,” in Coregrid workshop on grid programming model, grid and p2p systems architecture, grid systems, tools and environments, Heraklion, Crete, Greece, 2007.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{beske:cg:heraklion:07,
Abstract = {We present behavioural skeletons for the CoreGRID Component Model, which are an abstraction aimed at simplifying the development of GCM-based self-management applications. Behavioural skeletons abstract component self-managent in component-based design as design patterns abstract class design in classic OO development. As here we just wish to introduce the behavioural skeleton framework, emphasis is placed on general skeleton structure rather than on their autonomic management policies.},
Address = {Heraklion, Crete, Greece},
Author = {Marco Aldinucci and Sonia Campa and Marco Danelutto and Patrizio Dazzi and Peter Kilpatrick and Domenico Laforenza and Nicola Tonellotto},
Booktitle = {CoreGRID Workshop on Grid Programming Model, Grid and P2P Systems Architecture, Grid Systems, Tools and Environments},
Date-Added = {2007-06-26 01:50:37 +0200},
Date-Modified = {2007-12-16 23:32:27 +0100},
Month = {jun},
Title = {Behavioural skeletons for component autonomic management on grids},
pdf = {http://compass2.di.unipi.it/TR/Files/TR-07-12.pdf.gz},
Year = {2007},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/Files/TR-07-12.pdf.gz}}

M. Aldinucci, S. Campa, M. Danelutto, P. Kilpatrick, P. Dazzi, D. Laforenza, and N. Tonellotto, “Behavioural skeletons for component autonomic management on grids,” Università di Pisa, Dipartimento di Informatica, TR-07-12, 2007. doi:10.1007/978-0-387-78448-9_1
[BibTeX] [Download PDF]

@techreport{beske:TR-07-12,
Author = {Marco Aldinucci and Sonia Campa and Marco Danelutto and Peter Kilpatrick and Patrizio Dazzi and Domenico Laforenza and Nicola Tonellotto},
Date-Added = {2007-10-15 20:03:59 +0200},
Date-Modified = {2007-10-15 20:04:09 +0200},
Doi = {10.1007/978-0-387-78448-9_1},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica},
Month = {may},
Number = {TR-07-12},
Title = {Behavioural skeletons for component autonomic management on grids},
pdf = {http://compass2.di.unipi.it/TR/Files/TR-07-12.pdf.gz},
Year = {2007},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/Files/TR-07-12.pdf.gz},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-0-387-78448-9_1}}

M. Aldinucci, M. Torquati, and P. Zuccato, “Virtualinux website,” , 2007.
[BibTeX]

@manual{virtualinux-web,
Author = {Marco Aldinucci and Massimo Torquati and Pierfrancesco Zuccato},
Note = {\url{http://virtualinux.sourceforge.net/}},
Title = {Virtualinux website},
Year = {2007}}

M. Aldinucci, M. Torquati, M. Vanneschi, M. Cacitti, A. Gervaso, and P. Zuccato, “VirtuaLinux design principles,” Università di Pisa, Dipartimento di Informatica, Italy, TR-07-13, 2007.
[BibTeX] [Download PDF]

@techreport{virtualinux:tr:07,
Author = {Marco Aldinucci and Massimo Torquati and Marco Vanneschi and Manuel Cacitti and Alessandro Gervaso and Pierfrancesco Zuccato},
Date-Added = {2007-07-30 20:59:58 +0200},
Date-Modified = {2007-09-16 18:47:11 +0200},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica, Italy},
Month = jun,
Number = {TR-07-13},
Title = {{VirtuaLinux} Design Principles},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2007_VirtuaLinux_TR-07-13.pdf},
Year = {2007},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2007_VirtuaLinux_TR-07-13.pdf}}

M. Aldinucci and M. Danelutto, “Skeleton based parallel programming: functional and parallel semantic in a single shot,” Computer languages, systems and structures, vol. 33, iss. 3-4, pp. 179-192, 2007. doi:10.1016/j.cl.2006.07.004
[Abstract] [BibTeX] [Download PDF]

@article{lithium:sem:CLSS,
Abstract = {Semantics of skeleton-based parallel programming languages comes usually as two distinct items: a functional semantics, modeling the function computed by the skeleton program, and a parallel semantics describing the ways used to exploit parallelism during the execution of the skeleton program. The former is usually expressed using some kind of semantic formalism, while the latter is almost always given in an informal way. Such a separation of functional and parallel semantics seriously impairs the possibility of programmers to use the semantic tools to prove properties of programs. In this work, we show how a formal semantic framework can be set up that handles both functional and parallel aspects of skeleton-based parallel programs. The framework is based on a labeled transition system. We show how different properties related to skeleton programs can be proved using such a system. We use Lithium, a skeleton-based full Java parallel programming environment, as the case study.},
Annote = {ISSN: 1477-8424},
Author = {Marco Aldinucci and Marco Danelutto},
Date-Modified = {2014-08-24 22:17:22 +0000},
Doi = {10.1016/j.cl.2006.07.004},
Journal = {Computer Languages, Systems and Structures},
Month = oct,
Number = {3-4},
Pages = {179-192},
Title = {Skeleton based parallel programming: functional and parallel semantic in a single shot},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2005_semantics_CLSS.pdf},
Volume = {33},
Year = {2007},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2005_semantics_CLSS.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1016/j.cl.2006.07.004}}

M. Aldinucci, S. Campa, M. Coppola, M. Danelutto, C. Zoccolo, F. André, and J. Buisson, “An abstract schema modeling adaptivity management,” in Integrated research in grid computing, S. Gorlatch and M. Danelutto, Eds., Springer, 2007, pp. 89-102. doi:10.1007/978-0-387-47658-2_7
[Abstract] [BibTeX] [Download PDF]

@incollection{adapt_rennes:IW_book:06,
Abstract = {Nowadays, component application adaptivity in Grid environments has been afforded in different ways, such those provided by the Dynaco/AFPAC framework and by the ASSIST environment. We propose an abstract schema that catches all the designing aspects a model for parallel component applications on Grid should define in order to uniformly handle the dynamic behavior of computing resources within complex parallel applications. The abstraction is validated by demonstrating how two different approaches to adaptivity, ASSIST and Dynaco/AFPAC, easily map to such schema.},
Annote = {ISBN: 0-387-47656-3},
Author = {Marco Aldinucci and Sonia Campa and Massimo Coppola and Marco Danelutto and Corrado Zoccolo and Francoise Andr{\'e} and J{\'e}r{\'e}my Buisson},
Booktitle = {Integrated Research in Grid Computing},
Date-Modified = {2012-03-18 00:36:49 +0000},
Doi = {10.1007/978-0-387-47658-2_7},
Editor = {Sergei Gorlatch and Marco Danelutto},
Isbn = {978-0-387-47656-8},
Owner = {aldinuc},
Pages = {89-102},
Publisher = {Springer},
Series = {CoreGRID},
Timestamp = {2006.06.28},
Title = {An abstract schema modeling adaptivity management},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_book_adapt.pdf},
Year = {2007},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_book_adapt.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-0-387-47658-2_7}}

M. Aldinucci, M. Danelutto, and P. Kilpatrick, “Management in distributed systems: a semi-formal approach,” Università di Pisa, Dipartimento di Informatica, TR-07-05, 2007. doi:10.1007/978-3-540-74466-5_69
[BibTeX] [Download PDF]

@techreport{orc_muskel:TR-07-05,
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick},
Date-Modified = {2007-10-15 20:03:29 +0200},
Doi = {10.1007/978-3-540-74466-5_69},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica},
Month = feb,
Number = {TR-07-05},
Title = {Management in distributed systems: a semi-formal approach},
pdf = {http://compass2.di.unipi.it/TR/Files/TR-07-05.pdf.gz},
Year = {2007},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/Files/TR-07-05.pdf.gz},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-540-74466-5_69}}

M. Aldinucci, M. Danelutto, and P. Kilpatrick, “Management in distributed systems: a semi-formal approach,” in Proc. of 13th intl. euro-par 2007 parallel processing, Rennes, France, 2007, pp. 651-661. doi:10.1007/978-3-540-74466-5
[Abstract] [BibTeX] [Download PDF]

@inproceedings{orc:europar:07,
Abstract = {The reverse engineering of a skeleton based programming environment and redesign to distribute management activities of the system and thereby remove a potential single point of failure is considered. The Orc notation is used to facilitate abstraction of the design and analysis of its properties. It is argued that Orc is particularly suited to this role as this type of management is essentially an orchestration activity. The Orc specification of the original version of the system is modified via a series of semi-formally justified derivation steps to obtain a specification of the decentralized management version which is then used as a basis for its implementation. Analysis of the two specifications allows qualitative prediction of the expected performance of the derived version with respect to the original, and this prediction is borne out in practice.},
Address = {Rennes, France},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick},
Booktitle = {Proc. of 13th Intl. Euro-Par 2007 Parallel Processing},
Date-Added = {2009-05-01 23:33:34 +0200},
Date-Modified = {2009-05-01 23:33:34 +0200},
Doi = {10.1007/978-3-540-74466-5},
Editor = {A.-M. Kermarrec and L. Boug{\'e} and T. Priol},
Isbn = {978-3-540-74465-8},
Month = aug,
Pages = {651-661},
Publisher = {Springer},
Series = {LNCS},
Title = {Management in distributed systems: a semi-formal approach},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2007_orc_europar.pdf},
Volume = {4641},
Year = {2007},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2007_orc_europar.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-540-74466-5}}

M. Aldinucci, M. Danelutto, and P. Kilpatrick, “Adding metadata to orc to support reasoning about grid programming,” in Towards next generation grids (proc. of the coregrid symposium 2007), Rennes, France, 2007, pp. 205-214. doi:10.1007/978-0-387-72498-0_19
[Abstract] [BibTeX] [Download PDF]

@inproceedings{orc:metadata:cgs:07,
Abstract = {Following earlier work demonstrating the utility of Orc as a means of specifying and reasoning about grid applications we propose the enhancement of such specifications with metadata that provide a means to extend an Orc specification with implementation oriented information. We argue that such specifications provide a useful refinement step in allowing reasoning about implementation related issues ahead of actual implementation or even prototyping. As examples, we demonstrate how such extended specifications can be used for investigating security related issues and for evaluating the cost of handling grid resource faults. The approach emphasises a semi-formal style of reasoning that makes maximum use of programmer domain knowledge and experience.},
Address = {Rennes, France},
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick},
Booktitle = {Towards Next Generation Grids (Proc. of the CoreGRID Symposium 2007)},
Date-Added = {2007-06-26 01:55:01 +0200},
Date-Modified = {2009-02-04 18:57:20 +0100},
Doi = {10.1007/978-0-387-72498-0_19},
Editor = {Thierry Priol and Marco Vanneschi},
Isbn = {978-0-387-72497-3},
Month = sep,
Pages = {205-214},
Publisher = {Springer},
Series = {CoreGRID},
Title = {Adding metadata to Orc to support reasoning about grid programming},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2007_orc_CGSymph.pdf},
Year = {2007},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2007_orc_CGSymph.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-0-387-72498-0_19}}

M. Aldinucci, M. Danelutto, and P. Kilpatrick, “Orc + metadata supporting grid programming,” Università di Pisa, Dipartimento di Informatica, TR-07-10, 2007.
[BibTeX] [Download PDF]

@techreport{orcmetadata:TR-07-10,
Author = {Marco Aldinucci and Marco Danelutto and Peter Kilpatrick},
Date-Added = {2007-10-15 20:04:45 +0200},
Date-Modified = {2007-10-15 20:05:26 +0200},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica},
Month = may,
Number = {TR-07-10},
Title = {Orc + metadata supporting grid programming},
pdf = {http://compass2.di.unipi.it/TR/Files/TR-07-10.pdf.gz},
Year = {2007},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/Files/TR-07-10.pdf.gz}}

M. Coppola, D. Laforenza, N. Tonellotto, M. Danelutto, M. Vanneschi, and C. Zoccolo, “Managing user expectation with component performance contracts,” in Proc. of the workshop on usage of service level agreements in grids, Austin, TX, USA, 2007.
[BibTeX]

@inproceedings{cop:usla:07,
Address = {Austin, TX, USA},
Author = {Massimo Coppola and Domenico Laforenza and Nicola Tonellotto and Marco Danelutto and Marco Vanneschi and Corrado Zoccolo},
Booktitle = {Proc. of the Workshop on Usage of Service Level Agreements in Grids},
Date-Added = {2007-10-09 10:25:36 +0200},
Date-Modified = {2008-10-15 23:00:30 +0200},
Month = {sep},
Publisher = {Springer},
Series = {CoreGRID},
Title = {Managing User Expectation with Component Performance Contracts},
Year = {2007}}

M. Coppola, M. Danelutto, N. Tonellotto, M. Vanneschi, and C. Zoccolo, “Execution support of high performance heterogeneous component-based applications on the grid,” in In proc. of euro-par 2006 workshops: parallel processing, coregrid 2006, unicore summit 2006, petascale computational biology and bioinformatics, dresden, germany, august 29-september 1, 2006, revised selected papers, 2007, pp. 171-185.
[BibTeX]

@inproceedings{DBLP:conf/europar/CoppolaDTVZ06,
Annote = {Euro-Par Workshops},
Author = {Massimo Coppola and Marco Danelutto and Nicola Tonellotto and Marco Vanneschi and Corrado Zoccolo},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Booktitle = {in Proc. of Euro-Par 2006 Workshops: Parallel Processing, CoreGRID 2006, UNICORE Summit 2006, Petascale Computational Biology and Bioinformatics, Dresden, Germany, August 29-September 1, 2006, Revised Selected Papers},
Editor = {Wolfgang Lehner and Norbert Meyer and Achim Streit and Craig Stewart},
Ee = {http://dx.doi.org/10.1007/978-3-540-72337-0_16},
Pages = {171-185},
Publisher = {Springer},
Series = {LNCS},
Title = {Execution Support of High Performance Heterogeneous Component-Based Applications on the Grid},
Volume = {4375},
Year = {2007}}

M. Danelutto, M. Aldinucci, and P. Kilpatrick, “Prototyping and reasoning about distributed systems: an orc based framework,” Institute on Programming Model, CoreGRID – Network of Excellence, TR-0102, 2007.
[BibTeX] [Download PDF]

@techreport{coregrid:tr0102,
Author = {Marco Danelutto and Marco Aldinucci and Peter Kilpatrick},
Date-Added = {2008-02-09 17:07:09 +0100},
Date-Modified = {2008-02-09 17:07:09 +0100},
Institution = {Institute on Programming Model, CoreGRID - Network of Excellence},
Month = aug,
Number = {TR-0102},
Title = {Prototyping and reasoning about distributed systems: an Orc based framework},
pdf = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0102.pdf},
Year = {2007},
Bdsk-Url-1 = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0102.pdf}}

M. Danelutto, M. Pasin, M. Vanneschi, P. Dazzi, L. Presti, and D. Laforenza, “Pal: exploiting java annotations for parallelism,” in Achievements in european research on grid systems, M. Bubak, S. Gorlatch, and T. Priol, Eds., Kraków, Poland: Springer, 2007, pp. 83-96.
[BibTeX]

@incollection{pal:IW_book:07,
Address = {Krak{\'o}w, Poland},
Author = {Marco Danelutto and Marcelo Pasin and Marco Vanneschi and Patrizio Dazzi and Luigi Presti and Domenico Laforenza},
Booktitle = {Achievements in European Research on Grid Systems},
Date-Modified = {2011-02-12 15:55:56 +0200},
Editor = {Marian Bubak and Sergei Gorlatch and Thierry Priol},
Month = nov,
Pages = {83-96},
Publisher = {Springer},
Read = {Yes},
Series = {CoreGRID},
Title = {PAL: Exploiting Java Annotations for Parallelism},
Year = {2007}}

C. Dittamo, A. Cisternino, and M. Danelutto, “Parallelization of c\# programs through annotations,” in Proc. of practical aspects of high-level parallel programming workshop (papp, co-located with iccs 2007), Beijing, China, 2007, pp. 585-592.
[BibTeX] [Download PDF]

@inproceedings{07:dcd:csharp,
Address = {Beijing, China},
Author = {Cristian Dittamo and Antonio Cisternino and Marco Danelutto},
Booktitle = {Proc. of Practical Aspects of High-Level Parallel Programming Workshop (PAPP, co-located with ICCS 2007)},
Date-Added = {2008-02-19 16:51:28 +0100},
Date-Modified = {2008-09-14 14:13:29 +0200},
Month = may,
Pages = {585-592},
Publisher = {Springer},
Series = {LNCS},
Title = {Parallelization of C\# Programs Through Annotations},
pdf = {http://www.springerlink.com/content/t4r5760316743456/fulltext.pdf},
Volume = {4488},
Year = {2007},
Bdsk-Url-1 = {http://www.springerlink.com/content/t4r5760316743456/fulltext.pdf}}

J. Dünnweber, S. Gorlatch, S. Campa, M. Aldinucci, and M. Danelutto, “Adaptable parallel components for grid programming,” in Integrated research in grid computing, S. Gorlatch and M. Danelutto, Eds., Springer, 2007, pp. 43-57. doi:10.1007/978-0-387-47658-2_4
[Abstract] [BibTeX] [Download PDF]

@incollection{codeadapt:IW_book:06,
Abstract = {We suggest that parallel software components used for grid computing should be adaptable to application-specific requirements, instead of developing new components from scratch for each particular application. As an example, we take a parallel farm component which is "embarrassingly parallel", i. e. , free of dependencies, and adapt it to the wavefront processing pattern with dependencies that impact its behavior. We describe our approach in the context of Higher-Order Components (HOCs), with the Java-based system Lithium as our implementation framework. The adaptation process relies on HOCs' mobile code parameters that are shipped over the network of the grid. We describe our implementation of the proposed component adaptation method and report first experimental results for a particular grid application -- the alignment of DNA sequence pairs, a popular, time-critical problem in computational molecular biology.},
Author = {Jan D{\"u}nnweber and Sergei Gorlatch and Sonia Campa and Marco Aldinucci and Marco Danelutto},
Booktitle = {Integrated Research in Grid Computing},
Date-Modified = {2009-02-01 17:56:57 +0100},
Doi = {10.1007/978-0-387-47658-2_4},
Editor = {Sergei Gorlatch and Marco Danelutto},
Isbn = {978-0-387-47656-8},
Pages = {43-57},
Publisher = {Springer},
Series = {CoreGRID},
Timestamp = {2006.06.28},
Title = {Adaptable Parallel Components for Grid Programming},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_book_muester.pdf},
Year = {2007},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_book_muester.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-0-387-47658-2_4}}

P. Kilpatrick, M. Danelutto, and M. Aldinucci, “Deriving grid applications from abstract models,” Institute on Programming Model, CoreGRID – Network of Excellence, TR-0085, 2007.
[BibTeX] [Download PDF]

@techreport{coregrid:tr0085,
Author = {Peter Kilpatrick and Marco Danelutto and Marco Aldinucci},
Date-Added = {2007-09-25 13:17:20 +0200},
Date-Modified = {2007-09-25 13:17:20 +0200},
Institution = {Institute on Programming Model, CoreGRID - Network of Excellence},
Month = {apr},
Number = {TR-0085},
Title = {Deriving Grid Applications from Abstract Models},
pdf = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0085.pdf},
Year = {2007},
Bdsk-Url-1 = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0085.pdf}}

M. Pasin, P. Kuonen, M. Danelutto, and M. Aldinucci, “Skeleton parallel programming and parallel objects,” in Integrated research in grid computing, S. Gorlatch and M. Danelutto, Eds., Springer, 2007, pp. 59-71. doi:10.1007/978-0-387-47658-2_5
[Abstract] [BibTeX] [Download PDF]

@incollection{pasin:IW_book:06,
Abstract = {This paper describes the ongoing work aimed at integrating the POP-C++ parallel object programming environment with the ASSIST component based parallel programming environment. Both these programming environments are shortly outlined, then several possibilities of integration are considered. For each one of these integration opportunities, the advantages and synergies that can be possibly achieved are outlined and discussed.
The text explains how GEA, the ASSIST deployer can be considered as the basis for the integration of such different systems. An architecture is proposed, extending the existing tools to work together. The current status of integration of the two environments is discussed, along with the expected results and fallouts on the two programming environments.},
Annote = {ISBN: 978-0-387-47656-8},
Author = {Marcelo Pasin and Pierre Kuonen and Marco Danelutto and Marco Aldinucci},
Booktitle = {Integrated Research in Grid Computing},
Date-Modified = {2009-02-01 17:51:38 +0100},
Doi = {10.1007/978-0-387-47658-2_5},
Editor = {Sergei Gorlatch and Marco Danelutto},
Isbn = {978-0-387-47656-8},
Owner = {aldinuc},
Pages = {59-71},
Publisher = {Springer},
Series = {CoreGRID},
Timestamp = {2006.06.28},
Title = {Skeleton Parallel Programming and Parallel Objects},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_book_popc.pdf},
Year = {2007},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_book_popc.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-0-387-47658-2_5}}

N. Tonellotto, D. Laforenza, M. Danelutto, M. Vanneschi, and C. Zoccolo, “A performance model for stream-based computations,” in Proc. of intl. euromicro pdp 2007: parallel distributed and network-based processing, Napoli, Italia, 2007, pp. 91-96.
[BibTeX]

@inproceedings{zoccolo:streammodel:euromicro:07,
Address = {Napoli, Italia},
Author = {Nicola Tonellotto and Domenico Laforenza and Marco Danelutto and Marco Vanneschi and Corrado Zoccolo},
Booktitle = {Proc. of Intl. Euromicro PDP 2007: Parallel Distributed and network-based Processing},
Date-Modified = {2007-03-08 15:47:19 +0100},
Editor = {Pasqua D'Ambra and Mario Rosario Guarracino},
Month = feb,
Pages = {91-96},
Publisher = {IEEE},
Title = {A Performance Model for Stream-Based Computations},
Year = {2007}}

2006

M. Aldinucci, F. André, J. Buisson, S. Campa, M. Coppola, M. Danelutto, and C. Zoccolo, “Parallel program/component adaptivity management,” in Parallel computing: current & future issues of high-end computing (proc. of PARCO 2005, malaga, spain), Germany, 2006, pp. 89-96.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{adaptivity:parco:05,
Abstract = {Grid computing platforms require to handle dynamic behaviour of computing resources within complex parallel applications. We introduce a formalization of adaptive behaviour that separates the abstract model of the application from the implementation design. We exemplify the abstract adaptation schema on two applications, and we show how two quite different approaches to adaptivity, the ASSIST environment and the AFPAC framework, easily map to this common schema.},
Address = {Germany},
Author = {Marco Aldinucci and Francoise Andr{\'e} and J{\'e}r{\'e}my Buisson and Sonia Campa and Massimo Coppola and Marco Danelutto and Corrado Zoccolo},
Booktitle = {Parallel Computing: Current \& Future Issues of High-End Computing (Proc. of {PARCO 2005}, Malaga, Spain)},
Date-Modified = {2012-11-18 17:08:30 +0000},
Editor = {G. R. Joubert and W. E. Nagel and F. J. Peters and O. Plata and P. Tirado and E. Zapata},
Month = dec,
Optannote = {ISBN: 3-00-017352-8},
Pages = {89-96},
Publisher = {John von Neumann Institute for Computing},
Series = {NIC},
Title = {Parallel program/component adaptivity management},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2005_adaptivity_parco.pdf},
Volume = {33},
Year = {2006},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2005_adaptivity_parco.pdf}}

M. Aldinucci, M. Danelutto, G. Giaccherini, M. Torquati, and M. Vanneschi, “Towards a distributed scalable data service for the grid,” in Parallel computing: current & future issues of high-end computing (proc. of PARCO 2005, malaga, spain), Germany, 2006, pp. 73-80.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{adhoc:parco:05,
Abstract = {ADHOC (Adaptive Distributed Herd of Object Caches) is a Grid-enabled, fast, scalable object repository providing programmers with a general storage module. We present three different software tools based on ADHOC: A parallel cache for Apache, a DSM, and a main memory parallel file system. We also show that these tools exhibit a considerable performance and speedup both in absolute figures and w.r.t. other software tools exploiting the same features.},
Address = {Germany},
Author = {Marco Aldinucci and Marco Danelutto and Gianni Giaccherini and Massimo Torquati and Marco Vanneschi},
Booktitle = {Parallel Computing: Current \& Future Issues of High-End Computing (Proc. of {PARCO 2005}, Malaga, Spain)},
Date-Modified = {2012-11-18 17:07:26 +0000},
Editor = {G. R. Joubert and W. E. Nagel and F. J. Peters and O. Plata and P. Tirado and E. Zapata},
Month = dec,
Optannote = {ISBN: 3-00-017352-8},
Pages = {73-80},
Publisher = {John von Neumann Institute for Computing},
Series = {NIC},
Title = {Towards a distributed scalable data service for the grid},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2005_adhoc_parco.pdf},
Volume = {33},
Year = {2006},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2005_adhoc_parco.pdf}}

M. Aldinucci and M. Danelutto, “Algorithmic skeletons meeting grids,” Parallel computing, vol. 32, iss. 7, pp. 449-462, 2006. doi:10.1016/j.parco.2006.04.001
[Abstract] [BibTeX] [Download PDF]

@article{advske:pc:06,
Abstract = {In this work, we discuss an extension of the set of principles that should guide the future design and development of skeletal programming systems, as defined by Cole in his "pragmatic manifesto'" paper. The three further principles introduced are related to the ability to exploit existing sequential code as well as to the ability to target typical modern architectures, those made out of heterogeneous processing elements with dynamically varying availability, processing power and connectivity features such as grids or heterogeneous, non-dedicated clusters. We outline two skeleton based programming environments currently developed at our university and we discuss how these environments adhere to the proposed set of principles. Eventually, we outline how some other relevant, well-known skeleton environments conform to the same set of principles.},
Author = {Marco Aldinucci and Marco Danelutto},
Date-Modified = {2008-02-07 03:38:19 +0100},
Doi = {10.1016/j.parco.2006.04.001},
Journal = {Parallel Computing},
Number = {7},
Pages = {449-462},
Title = {Algorithmic skeletons meeting grids},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2006_advske_PC.pdf},
Volume = {32},
Year = {2006},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2006_advske_PC.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1016/j.parco.2006.04.001}}

M. Aldinucci, M. Coppola, M. Danelutto, N. Tonellotto, M. Vanneschi, and C. Zoccolo, “High level grid programming with ASSIST,” Computational methods in science and technology, vol. 12, iss. 1, pp. 21-32, 2006.
[Abstract] [BibTeX] [Download PDF]

@article{assist:CMST:06,
Abstract = {The development of efficient Grid applications usually requires writing huge portions of code directly at the level of abstraction provided by the underlying Grid middleware. In this work we discuss an alternative approach, raising the level of abstraction used when programming Grid applications. Our approach requires programmers just to describe in a qualitative way the kind of parallelism they want to express. Then, compiler tools, loader tools and run time system take complete care of running the application on a Grid target architecture. This allows to move most of the cumbersome tasks related to Grid targeting and management from programmer responsibility to tools. This paper introduces the structured parallel programming environment ASSIST, whose design is aimed at raising the level of abstraction in Grid programming and discusses how it can support transparent Grid programming while implementing Grid adaptivity.},
Annote = {ISSN: 1505-0602},
Author = {Marco Aldinucci and Massimo Coppola and Marco Danelutto and Nicola Tonellotto and Marco Vanneschi and Corrado Zoccolo},
Date-Modified = {2012-08-14 15:26:55 +0000},
Journal = {Computational Methods in Science and Technology},
Number = {1},
Owner = {aldinuc},
Pages = {21-32},
Title = {High level grid programming with {ASSIST}},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2006_assist_j_cmst.pdf},
Volume = {12},
Year = {2006},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2006_assist_j_cmst.pdf}}

M. Aldinucci, M. Coppola, M. Danelutto, M. Vanneschi, and C. Zoccolo, “ASSIST as a research framework for high-performance grid programming environments,” in Grid computing: software environments and tools, J. C. Cunha and O. F. Rana, Eds., Springer, 2006, pp. 230-256. doi:10.1007/1-84628-339-6_10
[Abstract] [BibTeX] [Download PDF]

@incollection{assist:cunhabook:05,
Abstract = {ASSIST is a programming environment supporting the development of parallel and distributed high-performance applications on a wide range of target architectures including massively parallel clusters/networks of workstations and Grids. We discuss how ASSIST can act as a valid research vehicle to study, experiment and realize Grid-aware programming environments for high-performance applications. Special emphasis is put on the innovative methodologies, strategies and tools for dynamically adaptive applications that represent the necessary step for the success of Grid platforms.
We start considering which are the fundamental features of Grid-aware programming environments, based upon structured parallel programming and components technology. Then we show how ASSIST evolved from its very first version, only targeting workstation clusters, to the current version, targeting Grids and solving many critical problems related to expressive power, flexibility, interoperability and efficiency. We also discuss how ASSIST deals with interoperability issues. Eventually we discuss how an ASSIST-based model for supporting dynamically adaptive applications can be derived.},
Author = {Marco Aldinucci and Massimo Coppola and Marco Danelutto and Marco Vanneschi and Corrado Zoccolo},
Booktitle = {Grid Computing: Software environments and Tools},
Chapter = {10},
Date-Modified = {2014-06-22 10:12:07 +0000},
Doi = {10.1007/1-84628-339-6_10},
Editor = {J. C. Cunha and O. F. Rana},
Isbn = {978-1-85233-998-2},
Month = jan,
Pages = {230-256},
Publisher = {Springer},
Title = {{ASSIST} as a research framework for high-performance Grid programming environments},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2005_assist_CuhnaBook.pdf},
Year = {2006},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2005_assist_CuhnaBook.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/1-84628-339-6_10}}

M. Aldinucci, M. Coppola, S. Campa, M. Danelutto, M. Vanneschi, and C. Zoccolo, “Structured implementation of component based grid programming environments,” in Future generation grids, V. Getov, D. Laforenza, and A. Reinefeld, Eds., Springer, 2006, pp. 217-239. doi:10.1007/978-0-387-29445-2_12
[Abstract] [BibTeX] [Download PDF]

@incollection{assist:dagstuhl:05,
Abstract = {The design,implementation and deployment of efficient high performance applications on Grids is usually a quite hard task, even in the case that modern and efficient grid middleware systems are used. We claim that most of the difficulties involved in such process can be moved away from programmer responsibility by following a structured programming model approach. The proposed approach relies on the development of a layered, component based execution environment. Each layer deals with distinct features and problems related to the implementation of GRID applications, exploiting the more appropriate techniques. Static optimizations are introduced in the compile layer, dynamic optimization are introduced in the run time layer, whereas modern grid middleware features are simply exploited using standard middleware systems as the final target architecture. We first discuss the general idea, then we discuss the peculiarities of the approach and eventually we discuss the preliminary results achieved in the GRID.it project, where a prototype high performance, component based, GRID programming environment is being developed using this approach.},
Author = {Marco Aldinucci and Massimo Coppola and Sonia Campa and Marco Danelutto and Marco Vanneschi and Corrado Zoccolo},
Booktitle = {Future Generation Grids},
Date-Modified = {2012-11-24 09:27:00 +0000},
Doi = {10.1007/978-0-387-29445-2_12},
Editor = {Vladimir Getov and Domenico Laforenza and Alexander Reinefeld},
Isbn = {978-0-387-27935-0},
Pages = {217-239},
Publisher = {Springer},
Series = {CoreGRID},
Title = {Structured implementation of component based grid programming environments},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2005_assist_Dagstuhl.pdf},
Year = {2006},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2005_assist_Dagstuhl.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-0-387-29445-2_12}}

M. Aldinucci, G. Antoniu, M. Danelutto, and M. Jan, “Fault-tolerant data sharing for high-level grid programming: a hierarchical storage architecture,” in Proc. of the integrated research in grid computing workshop, Kraków, Poland, 2006, pp. 177-188.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{assist:juxmem:IW:06,
Abstract = {Enabling high-level programming models on grids is today a major challenge. A way to achieve this goal relies on the use of environments able to transparently and automatically provide adequate support for low-level, grid-specific issues (fault-tolerance, scalability, etc.). This paper discusses the above approach when applied to grid data management. As a case study, we propose a 2-tier software architecture that supports transparent, fault-tolerant, grid-level data sharing in the ASSIST programming environment (University of Pisa), based on the JuxMem grid data sharing service (INRIA Rennes).},
Address = {Krak{\'o}w, Poland},
Author = {Marco Aldinucci and Gabriel Antoniu and Marco Danelutto and Mathieu Jan},
Booktitle = {Proc. of the Integrated Research in Grid Computing Workshop},
Date-Modified = {2012-11-18 17:23:11 +0000},
Editor = {Marian Bubak and Sergei Gorlatch and Thierry Priol},
Keywords = {Duplicate},
Month = oct,
Optannote = {ISBN: 83-9115141-6-1},
Pages = {177-188},
Publisher = {Academic Computing Centre {CYFRONET AGH}},
Series = {CoreGRID},
Title = {Fault-Tolerant Data Sharing for High-level Grid Programming: A Hierarchical Storage Architecture},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_juxadhocmem.pdf},
Year = {2006},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_juxadhocmem.pdf}}

M. Aldinucci, M. Danelutto, and M. Vanneschi, “Autonomic QoS in ASSIST grid-aware components,” in Proc. of intl. euromicro pdp 2006: parallel distributed and network-based processing, Montbéliard, France, 2006, pp. 221-230. doi:10.1109/PDP.2006.25
[Abstract] [BibTeX] [Download PDF]

@inproceedings{assist:qos:euromicro:06,
Abstract = {Current Grid-aware applications are developed on existing software infrastructures, such as Globus, by developers who are experts on Grid software implementation. Although many useful applications have been produced this way, this approach may hardly support the additional complexity to Quality of Service (QoS) control in real application. We describe the ASSIST programming environment, the prototype of parallel programming environment currently under development at our group, as a suitable basis to capture all the desired features for QoS control for the Grid. Grid applications, built as compositions of ASSIST components, are supported by an innovative Grid Abstract Machine, which includes essential abstractions of standard middleware services and a hierarchical Application Manager, which may be considered as an early prototype of Autonomic Manager.},
Address = {Montb{\'e}liard, France},
Author = {Marco Aldinucci and Marco Danelutto and Marco Vanneschi},
Booktitle = {Proc. of Intl. Euromicro PDP 2006: Parallel Distributed and network-based Processing},
Date-Modified = {2012-11-18 16:14:35 +0000},
Doi = {10.1109/PDP.2006.25},
Month = feb,
Pages = {221-230},
Publisher = {IEEE},
Title = {Autonomic {QoS} in {ASSIST} Grid-aware components},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2006_QoS_PDP.pdf},
Year = {2006},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2006_QoS_PDP.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1109/PDP.2006.25}}

M. Aldinucci, M. Danelutto, A. Paternesi, R. Ravazzolo, and M. Vanneschi, “Building interoperable grid-aware ASSIST applications via WebServices,” in Parallel computing: current & future issues of high-end computing (proc. of PARCO 2005, malaga, spain), Germany, 2006, pp. 145-152.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{assist:webs:parco:05,
Abstract = {The ASSIST environment provides a high-level programming toolkit for the grid. ASSIST applications are described by means of a coordination language, which can express arbitrary graphs of modules. These modules (or a graph of them) may be enclosed in components specifically designed for the grid (GRID.it components). In this paper we describe how ASSIST modules can be wired through standard Web Services, and how GRID.it components may be made available as standard Web Services.},
Address = {Germany},
Author = {Marco Aldinucci and Marco Danelutto and Andrea Paternesi and Roberto Ravazzolo and Marco Vanneschi},
Booktitle = {Parallel Computing: Current \& Future Issues of High-End Computing (Proc. of {PARCO 2005}, Malaga, Spain)},
Date-Modified = {2012-11-18 17:06:42 +0000},
Editor = {G. R. Joubert and W. E. Nagel and F. J. Peters and O. Plata and P. Tirado and E. Zapata},
Isbn = {3000173528},
Month = dec,
Pages = {145-152},
Publisher = {John von Neumann Institute for Computing},
Series = {NIC},
Title = {Building interoperable grid-aware {ASSIST} applications via {WebServices}},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2005_ws_parco.pdf},
Volume = {33},
Year = {2006},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2005_ws_parco.pdf}}

M. Aldinucci, G. Antoniu, M. Danelutto, and M. Jan, “Fault-tolerant data sharing for high-level grid programming: a hierarchical storage architecture,” Institute on Programming Model, CoreGRID – Network of Excellence, TR-0058, 2006.
[BibTeX] [Download PDF]

@techreport{coregrid:tr0058,
Author = {Marco Aldinucci and Gabriel Antoniu and Marco Danelutto and Mathieu Jan},
Date-Added = {2007-09-25 13:15:55 +0200},
Date-Modified = {2007-09-25 13:16:55 +0200},
Institution = {Institute on Programming Model, CoreGRID - Network of Excellence},
Month = aug,
Number = {TR-0058},
Title = {Fault-Tolerant Data Sharing for High-level Grid Programming: A Hierarchical Storage Architecture},
pdf = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0058.pdf},
Year = {2006},
Bdsk-Url-1 = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0058.pdf}}

M. Aldinucci and M. Danelutto, “The cost of security in skeletal systems,” Università di Pisa, Dipartimento di Informatica, Italy, TR-06-03, 2006.
[BibTeX] [Download PDF]

@techreport{self:tr:06-03,
Author = {Marco Aldinucci and Marco Danelutto},
Date-Added = {2007-10-23 22:54:04 +0200},
Date-Modified = {2007-10-23 22:54:58 +0200},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica, Italy},
Month = feb,
Number = {TR-06-03},
Title = {The cost of security in skeletal systems},
pdf = {http://compass2.di.unipi.it/TR/Files/TR-06-03.pdf.gz},
Year = {2006},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/Files/TR-06-03.pdf.gz}}

M. Danelutto and P. Dazzi, “Joint structured/non structured parallelism exploitation through data flow,” in Proc. of iccs: intl. conference on computational science, workshop on practical aspects of high-level parallel programming, Reading, UK, 2006. doi:10.1007/11758525_124
[BibTeX] [Download PDF]

@inproceedings{DaDa06PAPP,
Address = {Reading, UK},
Author = {Marco Danelutto and Patrizio Dazzi},
Booktitle = {Proc. of ICCS: Intl. Conference on Computational Science, Workshop on Practical Aspects of High-level Parallel Programming},
Date-Modified = {2008-02-20 18:08:53 +0100},
Doi = {10.1007/11758525_124},
Editor = {V. Alexandrov and D. van Albada and P. M. A. Sloot and J. Dongarra},
Month = may,
Publisher = {Springer},
Series = {LNCS},
Title = {Joint structured/non structured parallelism exploitation through data flow},
pdf = {http://springerlink.metapress.com/content/m786408106167147/fulltext.pdf},
Volume = {3992},
Year = {2006},
Bdsk-Url-1 = {http://springerlink.metapress.com/content/m786408106167147/fulltext.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/11758525_124}}

M. Danelutto, C. Migliore, and C. Pantaleo, “An alternative implementation schema for ASSIST parmod,” in Proc. of intl. euromicro pdp: parallel distributed and network-based processing, Montbéliard, France, 2006, pp. 56-63.
[BibTeX] [URL]

@inproceedings{DaMiPa06PDP,
Address = {Montb{\'e}liard, France},
Author = {Marco Danelutto and Castrenze Migliore and Cosimino Pantaleo},
Booktitle = {Proc. of Intl. Euromicro PDP: Parallel Distributed and network-based Processing},
Month = feb,
Pages = {56-63},
Publisher = {IEEE},
Title = {An Alternative Implementation Schema for {ASSIST} parmod},
Url = {http://www.di.unipi.it/~marcod},
Year = {2006},
Bdsk-Url-1 = {http://www.di.unipi.it/~marcod}}

M. Danelutto and M. Vanneschi, “A RISC approach to Grid,” in Engineering the grid, D. B. Martino, J. Dongarra, A. Hoisie, L. T. Yang, and H. Zima, Eds., ASP press, 2006.
[BibTeX] [URL]

@incollection{DanVan05RISC,
Author = {Marco Danelutto and Marco Vanneschi},
Booktitle = {Engineering the grid},
Chapter = {8},
Editor = {B. Di Martino and J. Dongarra and A. Hoisie and L. T. Yang and H. Zima},
Month = jan,
Publisher = {ASP press},
Title = {A {RISC} approach to {Grid}},
Url = {http://www.di.unipi.it/~marcod/},
Year = {2006},
Bdsk-Url-1 = {http://www.di.unipi.it/~marcod/}}

M. Danelutto, M. Pasin, M. Vanneschi, P. Dazzi, L. Presti, and D. Laforenza, “Pal: towards a new approach to high level parallel programming,” in Proc. of the integrated research in grid computing workshop, Kraków, Poland, 2006, pp. 189-200.
[BibTeX]

@inproceedings{pal:IW:07,
Address = {Krak{\'o}w, Poland},
Author = {Marco Danelutto and Marcelo Pasin and Marco Vanneschi and Patrizio Dazzi and Luigi Presti and Domenico Laforenza},
Booktitle = {Proc. of the Integrated Research in Grid Computing Workshop},
Date-Added = {2007-09-23 13:48:21 +0200},
Date-Modified = {2007-09-25 12:48:27 +0200},
Editor = {Marian Bubak and Sergei Gorlatch and Thierry Priol},
Month = oct,
Pages = {189-200},
Publisher = {Academic Computing Centre {CYFRONET AGH}},
Series = {CoreGRID},
Title = {PAL: towards a new approach to high level parallel programming},
Year = {2006}}

N. Tonellotto, M. Coppola, M. Danelutto, M. Vanneschi, and C. Zoccolo, “Execution support of high performance heterogeneous component-based applications on the grid,” in Proc. of the integrated research in grid computing workshop, Kraków, Poland, 2006, pp. 201-212.
[BibTeX]

@inproceedings{coppola:Execsupport:IW:06,
Address = {Krak{\'o}w, Poland},
Author = {Nicola Tonellotto and Massimo Coppola and Marco Danelutto and Marco Vanneschi and Corrado Zoccolo},
Booktitle = {Proc. of the Integrated Research in Grid Computing Workshop},
Date-Modified = {2007-12-09 22:29:12 +0100},
Editor = {Marian Bubak and Sergei Gorlatch and Thierry Priol},
Month = {oct},
Pages = {201-212},
Publisher = {Academic Computing Centre {CYFRONET AGH}},
Series = {CoreGRID},
Title = {Execution Support of High Performance Heterogeneous Component-Based Applications on the Grid},
Year = {2006}}

2005

M. Aldinucci, F. André, J. Buisson, S. Campa, M. Coppola, M. Danelutto, and C. Zoccolo, “Parallel program/component adaptivity management,” in Proc. of the integrated research in grid computing workshop, Pisa, Italy, 2005, pp. 95-104.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{adaptivity:IW:05,
Abstract = {Grid computing platforms require to handle dynamic be- haviour of computing resources within complex parallel applications. We introduce a formalization of adaptive behaviour that separates the ab- stract model of the application from the implementation design. We ex- emplify the abstract adaptation schema on two applications, and we show how two quite different approaches to adaptivity, the ASSIST environ- ment and the AFPAC framework, easily map to this common schema.},
Address = {Pisa, Italy},
Author = {Marco Aldinucci and Francoise Andr{\'e} and J{\'e}r{\'e}my Buisson and Sonia Campa and Massimo Coppola and Marco Danelutto and Corrado Zoccolo},
Booktitle = {Proc. of the Integrated Research in Grid Computing Workshop},
Date-Modified = {2012-11-18 17:04:16 +0000},
Editor = {Sergei Gorlatch and Marco Danelutto},
Keywords = {Duplicate},
Month = nov,
Pages = {95-104},
Publisher = {Universit{\`a} di Pisa, Dipartimento di Informatica},
Title = {Parallel program/component adaptivity management},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_adapt.pdf},
Volume = {TR-05-22},
Year = {2005},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_adapt.pdf}}

M. Aldinucci, M. Danelutto, J. Dünnweber, and S. Gorlatch, “Optimization techniques for implementing parallel skeletons in distributed environments,” Institute on Programming Model, CoreGRID – Network of Excellence, TR-0001, 2005.
[BibTeX] [Download PDF]

@techreport{coregrid:tr0001,
Author = {Marco Aldinucci and Marco Danelutto and Jan D{\"u}nnweber and Sergei Gorlatch},
Institution = {Institute on Programming Model, CoreGRID - Network of Excellence},
Month = jan,
Number = {TR-0001},
Title = {Optimization Techniques for Implementing Parallel Skeletons in Distributed Environments},
pdf = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0001.pdf},
Year = {2005},
Bdsk-Url-1 = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0001.pdf}}

M. Aldinucci, A. Petrocelli, E. Pistoletti, M. Torquati, M. Vanneschi, L. Veraldi, and C. Zoccolo, “Dynamic reconfiguration of grid-aware applications in ASSIST,” Università di Pisa, Dipartimento di Informatica, Italy, TR-05-05, 2005.
[BibTeX] [Download PDF]

@techreport{dyn:tr-05-05,
Author = {Marco Aldinucci and Alessandro Petrocelli and Edoardo Pistoletti and Massimo Torquati and Marco Vanneschi and Luca Veraldi and Corrado Zoccolo},
Date-Added = {2007-10-23 23:03:17 +0200},
Date-Modified = {2007-10-23 23:06:40 +0200},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica, Italy},
Month = feb,
Number = {TR-05-05},
Title = {Dynamic reconfiguration of Grid-aware applications in {ASSIST}},
pdf = {http://compass2.di.unipi.it/TR/Files/TR-05-05.pdf.gz},
Year = {2005},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/Files/TR-05-05.pdf.gz}}

M. Aldinucci, A. Petrocelli, E. Pistoletti, M. Torquati, M. Vanneschi, L. Veraldi, and C. Zoccolo, “Dynamic reconfiguration of grid-aware applications in ASSIST,” in Proc. of 11th intl. euro-par 2005 parallel processing, 2005, pp. 771-781. doi:10.1007/11549468_84
[Abstract] [BibTeX] [Download PDF]

@inproceedings{dyn:europar:05,
Abstract = {Current grid-aware applications are implemented on top of low-level libraries by developers who are experts on grid middleware architecture. This approach can hardly support the additional complexity of QoS control in real applications. We discuss a novel approach used in the ASSIST programming environment to implement/guarantee user provided QoS contracts in a transparent and effective way. Our approach is based on the implementation of automatic run-time reconfiguration of ASSIST application executions triggered by mismatch between the user provided QoS contract and the actual performance values achieved.},
Author = {Marco Aldinucci and Alessandro Petrocelli and Edoardo Pistoletti and Massimo Torquati and Marco Vanneschi and Luca Veraldi and Corrado Zoccolo},
Booktitle = {Proc. of 11th Intl. Euro-Par 2005 Parallel Processing},
Date-Added = {2007-05-20 21:04:01 +0200},
Date-Modified = {2009-01-23 00:16:41 +0100},
Doi = {10.1007/11549468_84},
Editor = {J. C. Cunha and P. D. Medeiros},
Month = aug,
Pages = {771-781},
Publisher = {Springer},
Series = {LNCS},
Title = {Dynamic reconfiguration of grid-aware applications in {ASSIST}},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2005_assist_dyn_europar.pdf},
Volume = {3648},
Year = {2005},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2005_assist_dyn_europar.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/11549468_84}}

M. Aldinucci, F. André, J. Buisson, S. Campa, M. Coppola, M. Danelutto, and C. Zoccolo, “Parallel program/component adaptivity management,” Institute on Programming Model, CoreGRID – Network of Excellence, TR-0014, 2005.
[BibTeX] [Download PDF]

@techreport{coregrid:tr0014,
Author = {Marco Aldinucci and Francoise Andr{\'e} and J{\'e}r{\'e}my Buisson and Sonia Campa and Massimo Coppola and Marco Danelutto and Corrado Zoccolo},
Date-Modified = {2007-10-08 15:48:28 +0200},
Institution = {Institute on Programming Model, CoreGRID - Network of Excellence},
Month = sep,
Number = {TR-0014},
Title = {Parallel program/component adaptivity management},
pdf = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0014.pdf},
Year = {2005},
Bdsk-Url-1 = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0014.pdf}}

M. Aldinucci, M. Danelutto, G. Giaccherini, M. Torquati, and M. Vanneschi, “Towards a distributed scalable data service for the grid,” Università di Pisa, Dipartimento di Informatica, Italy, TR-05-21, 2005.
[BibTeX] [Download PDF]

@techreport{adhoc:tr-05-21,
Author = {Marco Aldinucci and Marco Danelutto and Gianni Giaccherini and Massimo Torquati and Marco Vanneschi},
Date-Added = {2007-10-23 23:00:34 +0200},
Date-Modified = {2007-10-23 23:02:23 +0200},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica, Italy},
Month = oct,
Number = {TR-05-21},
Title = {Towards a distributed scalable data service for the Grid},
pdf = {http://compass2.di.unipi.it/TR/Files/TR-05-21.pdf.gz},
Year = {2005},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/Files/TR-05-21.pdf.gz}}

M. Aldinucci, M. Danelutto, J. Dünnweber, and S. Gorlatch, “Optimization techniques for skeletons on grids,” in Grid computing and new frontiers of high performance processing, L. Grandinetti, Ed., Elsevier, 2005, vol. 14, pp. 255-273. doi:10.1016/S0927-5452(05)80014-0
[Abstract] [BibTeX] [Download PDF]

@incollection{vigoni:fut_rmi:book:05,
Abstract = {Skeletons are common patterns of parallelism, such as farm and pipeline, that can be abstracted and offered to the application programmer as programming primitives. We describe the use and implementation of skeletons on emerging computational grids, with the skeleton system Lithium, based on Java and RMI, as our reference programming syttem. Our main contribution is the exploration of optimization techniques for implementing skeletons on grids based on an optimized, future-based RMI mechanism, which we integrate into the macro-dataflow evaluation mechanism of Lithium. We discuss three optimizations: 1) a lookahead mechanism that allows to process multiple tasks concurrently at each grid server and thereby increases the overall degree of parallelism, 2) a lazy taskbinding technique that reduces interactions between grid servers and the task dispatcher, and 3) dynamic improvements that optimize the collecting of results and the work-load balancing. We report experimental results that demonstrate the improvements due to our optimizations on various testbeds, including a heterogeneous grid-like environment.},
Author = {Marco Aldinucci and Marco Danelutto and Jan D{\"u}nnweber and Sergei Gorlatch},
Booktitle = {Grid Computing and New Frontiers of High Performance Processing},
Chapter = {2},
Date-Modified = {2012-09-23 11:03:01 +0000},
Doi = {10.1016/S0927-5452(05)80014-0},
Editor = {L. Grandinetti},
Isbn = {0-444-51999-8},
Issn = {09275452},
Month = oct,
Pages = {255-273},
Publisher = {Elsevier},
Series = {Advances in Parallel Computing},
Title = {Optimization techniques for skeletons on grids},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2005_LithiumFutRMI_book.pdf},
Volume = {14},
Year = {2005},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2005_LithiumFutRMI_book.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1016/S0927-5452(05)80014-0}}

M. Aldinucci, M. Danelutto, A. Paternesi, R. Ravazzolo, and M. Vanneschi, “Building interoperable grid-aware assist applications via web services,” Università di Pisa, Dipartimento di Informatica, Italy, TR-05-24, 2005.
[Abstract] [BibTeX] [Download PDF]

@techreport{assist:webs:tr-05-24,
Abstract = {The ASSIST environment provides a high-level programming toolkit for the grid. ASSIST applications are described by means of a coordination language, which can express arbitrary graphs of modules. These modules (or a graph of them) may be enclosed in components specifically designed for the grid (GRID.it components). In this paper we describe how ASSIST modules can be wired through standard Web Services, and how GRID.it components may be made available as standard Web Services.},
Author = {Marco Aldinucci and Marco Danelutto and Andrea Paternesi and Roberto Ravazzolo and Marco Vanneschi},
Date-Added = {2007-10-23 22:57:08 +0200},
Date-Modified = {2010-10-24 09:23:12 +0200},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica, Italy},
Month = dec,
Number = {TR-05-24},
Title = {Building Interoperable Grid-aware ASSIST Applications via Web Services},
pdf = {http://compass2.di.unipi.it/TR/Files/TR-05-24.pdf.gz},
Year = {2005},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/Files/TR-05-24.pdf.gz}}

M. Aldinucci, S. Campa, M. Coppola, M. Danelutto, D. Laforenza, D. Puppin, L. Scarponi, M. Vanneschi, and C. Zoccolo, “Components for high performance grid programming in grid.it,” in Proc. of the intl. workshop on component models and systems for grid applications, Saint-Malo, France, 2005, pp. 19-38. doi:10.1007/0-387-23352-0_2
[Abstract] [BibTeX] [Download PDF]

@inproceedings{assist:stmalo:05,
Abstract = {This paper presents the main ideas of the high-performance component-based Grid programming environment of the Grid.it project. High-performance components are characterized by a programming model that integrates the concepts of structured parallelism, component interaction, compositionality, and adaptivity. We show that ASSIST, the prototype of parallel programming environment currently under development at our group, is a suitable basis to capture all the desired features of the component model in a flexible and efficient manner. For the sake of interoperability, ASSIST modules or programs are automatically encapsulated in standard frameworks; currently, we are experimenting Web Services and the CORBA Component Model. Grid applications, built as compositions of ASSIST components and possibly other existing (legacy) components, are supported by an innovative Grid Abstract Machine, that includes essential abstractions of standard middleware services and a hierarchical Application Manager (AM). AM supports static allocation and dynamic reallocation of adaptive applications according to a performance contract, a reconfiguration strategy, and a performance model.},
Address = {Saint-Malo, France},
Author = {Marco Aldinucci and Sonia Campa and Massimo Coppola and Marco Danelutto and Domenico Laforenza and Diego Puppin and Luca Scarponi and Marco Vanneschi and Corrado Zoccolo},
Booktitle = {Proc. of the Intl. Workshop on Component Models and Systems for Grid Applications},
Date-Modified = {2009-02-03 18:34:58 +0100},
Doi = {10.1007/0-387-23352-0_2},
Editor = {V. Getov and T. Kielmann},
Isbn = {978-0-387-23351-2},
Month = jan,
Pages = {19-38},
Publisher = {Springer},
Series = {CoreGRID},
Title = {Components for high performance Grid programming in Grid.it},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2005_assist_ics_stmalo.pdf},
Year = {2005},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2005_assist_ics_stmalo.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/0-387-23352-0_2}}

M. Aldinucci and A. Benoit, “Towards the automatic mapping of ASSIST applications for the grid,” in Proc. of the integrated research in grid computing workshop, Pisa, Italy, 2005, pp. 59-68.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{assist:pepa:IW:05,
Abstract = {One of the most promising technical innovations in present day computing is the invention of grid technologies which harness the computational power of widely distributed collections of computers. However, the programming and optimisation burden of a low level approach to grid computing is clearly unacceptable for large scale, complex applications. The development of grid applications can be simplified by using high-level programming environments. In the present work, we address the problem of the mapping of a high-level grid application onto the computational resources. In order to optimise the mapping of the application, we propose to automatically generate performance models from the application using the process algebra PEPA. We target in this work applications written with the high-level environment ASSIST, since the use of such a structured environment allows us to automate the study of the application more effectively.},
Address = {Pisa, Italy},
Author = {Marco Aldinucci and Anne Benoit},
Booktitle = {Proc. of the Integrated Research in Grid Computing Workshop},
Editor = {Sergei Gorlatch and Marco Danelutto},
Month = nov,
Pages = {59-68},
Publisher = {Universit{\`a} di Pisa, Dipartimento di Informatica},
Title = {Towards the Automatic Mapping of {ASSIST} Applications for the Grid},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_pepa.pdf},
Volume = {TR-05-22},
Year = {2005},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_pepa.pdf}}

M. Coppola, M. Danelutto, S. Lacour, C. Pérez, T. Priol, N. Tonellotto, and C. Zoccolo, “Towards a common deployment model for grid systems,” in Proc. of the integrated research in grid computing workshop, Pisa, Italy, 2005, pp. 31-40.
[BibTeX] [URL]

@inproceedings{CoDaLa06IW,
Address = {Pisa, Italy},
Author = {Massimo Coppola and Marco Danelutto and S{\'e}bastien Lacour and Christian P{\'e}rez and Thierry Priol and Nicola Tonellotto and Corrado Zoccolo},
Booktitle = {Proc. of the Integrated Research in Grid Computing Workshop},
Editor = {Sergei Gorlatch and Marco Danelutto},
Month = nov,
Pages = {31-40},
Publisher = {Universit{\`a} di Pisa, Dipartimento di Informatica},
Title = {Towards a common deployment model for Grid systems},
Url = {http://www.di.unipi.it/~marcod},
Volume = {TR-05-22},
Year = {2005},
Bdsk-Url-1 = {http://www.di.unipi.it/~marcod}}

M. Danelutto, “Irregularity handling via structured parallel programming,” Intl. journal of computational science and engineering, vol. 3-4, 2005.
[BibTeX] [URL]

@article{Da05IJCSE,
Author = {Marco Danelutto},
Journal = {Intl. Journal of Computational Science and Engineering},
Title = {Irregularity handling via structured parallel programming},
Url = {http://www.di.unipi.it/~marcod},
Volume = {3-4},
Year = {2005},
Bdsk-Url-1 = {http://www.di.unipi.it/~marcod}}

M. Danelutto and P. Dazzi, “A Java/Jini framework supporting stream parallel computations,” in Parallel computing: current & future issues of high-end computing (proc. of PARCO 2005, malaga, spain), Germany, 2005, pp. 681-688.
[BibTeX] [URL]

@inproceedings{DaDa05parco,
Address = {Germany},
Author = {Marco Danelutto and Patrizio Dazzi},
Booktitle = {Parallel Computing: Current \& Future Issues of High-End Computing (Proc. of {PARCO 2005}, Malaga, Spain)},
Date-Modified = {2007-09-25 12:57:15 +0200},
Editor = {G. R. Joubert and W. E. Nagel and F. J. Peters and O. Plata and P. Tirado and E. Zapata},
Month = dec,
Pages = {681-688},
Publisher = {John von Neumann Institute for Computing},
Series = {NIC},
Title = {A {Java/Jini} framework supporting stream parallel computations},
Url = {http://www.di.unipi.it/~marcod},
Volume = {33},
Year = {2005},
Bdsk-Url-1 = {http://www.di.unipi.it/~marcod}}

M. Danelutto, M. Vanneschi, C. Zoccolo, N. Tonellotto, S. Orlando, R. Baraglia, T. Fagni, D. Laforenza, and A. Paccosi, “HPC application execution on grids,” in Future generation grids, V. Getov, D. Laforenza, and A. Reinefeld, Eds., Springer, 2005, pp. 263-282.
[BibTeX]

@incollection{assist:deploy:dagstuhl:05,
Author = {Marco Danelutto and Marco Vanneschi and Corrado Zoccolo and Nicola Tonellotto and Salvatore Orlando and Ranieri Baraglia and Tiziano Fagni and Domenico Laforenza and Alessandro Paccosi},
Booktitle = {Future Generation Grids},
Date-Modified = {2007-12-09 22:28:29 +0100},
Editor = {V. Getov and D. Laforenza and A. Reinefeld},
Month = nov,
Opturl = {http://www.di.unipi.it/~marcod},
Pages = {263-282},
Publisher = {Springer},
Series = {CoreGRID},
Title = {{HPC} application execution on GRIDs},
Year = {2005}}

M. Danelutto, “QoS in parallel programming through application managers,” in Proc. of intl. euromicro pdp: parallel distributed and network-based processing, Lugano, Switzerland, 2005, pp. 282-289.
[BibTeX] [URL]

@inproceedings{muskel:qos:pdp:05,
Address = {Lugano, Switzerland},
Author = {Marco Danelutto},
Booktitle = {Proc. of Intl. Euromicro PDP: Parallel Distributed and network-based Processing},
Month = feb,
Pages = {282-289},
Publisher = {IEEE},
Title = {{QoS} in parallel programming through application managers},
Url = {http://www.di.unipi.it/~marcod},
Year = {2005},
Bdsk-Url-1 = {http://www.di.unipi.it/~marcod}}

J. Dünnweber, S. Gorlatch, S. Campa, M. Aldinucci, and M. Danelutto, “Using code parameters for component adaptations,” in Proc. of the integrated research in grid computing workshop, Pisa, Italy, 2005, pp. 49-57.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{codeadapt:IW:05,
Abstract = {Adaptation means that the behavior of a software component is adjusted to application or platform-specific requirements: new components required in a particular application do not need to be developed from scratch when available components can be adapted accordingly. Instead of introducing a new adaptation syntax (as it is done, e. g. , in AOP), we describe adaptations in the context of Java-based Higher-Order Components (HOCs). HOCs incorporate a code parameter plugin mechanism enabling adaptations on the grid. Our approach is illustrated using a case study of sequence alignment. We show how a HOC with the required provisions for data dependencies in this application can be generated by adapting a farm component, which is "embarrassingly parallel", i.e., free of data dependencies. This way, we could reuse the efficient farm implementation from the Lithium library, although our case study exhibits the wavefront pattern of parallelism which is different from the farm.},
Address = {Pisa, Italy},
Author = {Jan D{\"u}nnweber and Sergei Gorlatch and Sonia Campa and Marco Aldinucci and Marco Danelutto},
Booktitle = {Proc. of the Integrated Research in Grid Computing Workshop},
Date-Modified = {2009-02-03 20:12:52 +0100},
Editor = {Sergei Gorlatch and Marco Danelutto},
Month = {nov},
Owner = {aldinuc},
Pages = {49-57},
Publisher = {Universit{\`a} di Pisa, Dipartimento di Informatica},
Timestamp = {2006.06.28},
Title = {Using Code Parameters for Component Adaptations},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_muenster.pdf},
Volume = {TR-05-22},
Year = {2005},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_muenster.pdf}}

J. Dünnweber, S. Gorlatch, S. Campa, M. Aldinucci, and M. Danelutto, “Behavior customization of parallel components application programming,” Institute on Programming Model, CoreGRID – Network of Excellence, TR-0002, 2005.
[BibTeX] [Download PDF]

@techreport{coregrid:tr0002,
Author = {Jan D{\"u}nnweber and Sergei Gorlatch and Sonia Campa and Marco Aldinucci and Marco Danelutto},
Date-Added = {2014-06-21 15:22:57 +0000},
Date-Modified = {2014-06-21 15:22:57 +0000},
Institution = {Institute on Programming Model, CoreGRID - Network of Excellence},
Month = apr,
Number = {TR-0002},
Title = {Behavior Customization of Parallel Components Application Programming},
pdf = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0002.pdf},
Year = {2005},
Bdsk-Url-1 = {http://www.coregrid.net/mambo/images/stories/TechnicalReports/tr-0002.pdf}}

I. Merelli, L. Milanesi, D. D’Agostino, A. Clematis, M. Vanneschi, and M. Danelutto, “Using parallel isosurface extraction in superficial molecular modeling,” in 1st intl. conference on distributed frameworks for multimedia applications (dfma 2005), Besançon, France, 2005, pp. 288-294.
[BibTeX] [URL]

@inproceedings{marchingcubes:05,
Address = {Besan\c{c}on, France},
Author = {Ivan Merelli and Luciano Milanesi and Daniele D'Agostino and Andrea Clematis and Marco Vanneschi and Marco Danelutto},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Booktitle = {1st Intl. Conference on Distributed Frameworks for Multimedia Applications (DFMA 2005)},
Isbn = {0-7695-2273-4},
Pages = {288-294},
Publisher = {IEEE},
Title = {Using Parallel Isosurface Extraction in Superficial Molecular Modeling},
Url = {http://doi.ieeecomputersociety.org/10.1109/DFMA.2005.51},
Year = {2005},
Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/DFMA.2005.51}}

M. Pasin, P. Kuonen, M. Danelutto, and M. Aldinucci, “Skeleton parallel programming and parallel objects,” in Proc. of the integrated research in grid computing workshop, Pisa, Italy, 2005, pp. 115-124.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{pasin:IW:05,
Abstract = {We describe here the ongoing work aimed at integrating the POP-C++ parallel object programming environment with the ASSIST component based parallel programming environment. Both these programming environments are shortly outlined, first. Then several possibilities of integration are considered. For each one of these integration opportunities, the advantages and synergies that can be possibly achieved are outlined and discussed. Eventually, the current status of integration of the two environments is discussed, along with the expected results and fallouts on the two programming environments.},
Address = {Pisa, Italy},
Author = {Marcelo Pasin and Pierre Kuonen and Marco Danelutto and Marco Aldinucci},
Booktitle = {Proc. of the Integrated Research in Grid Computing Workshop},
Date-Modified = {2009-02-03 20:28:52 +0100},
Editor = {Sergei Gorlatch and Marco Danelutto},
Month = nov,
Owner = {aldinuc},
Pages = {115-124},
Publisher = {Universit{\`a} di Pisa, Dipartimento di Informatica},
Timestamp = {2006.06.28},
Title = {Skeleton Parallel Programming and Parallel Objects},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_popc.pdf},
Volume = {TR-05-22},
Year = {2005},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2006_IW_popc.pdf}}

2004

M. Aldinucci and M. Torquati, “Accelerating apache farms through ad-HOC distributed scalable objects repository,” Università di Pisa, Dipartimento di Informatica, Italy, TR-04-08, 2004.
[BibTeX] [URL]

@techreport{adhoc:tr-04-08,
Author = {Marco Aldinucci and Massimo Torquati},
Date-Added = {2007-10-23 23:10:16 +0200},
Date-Modified = {2007-10-23 23:12:45 +0200},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica, Italy},
Month = feb,
Number = {TR-04-08},
Title = {Accelerating Apache farms through {ad-HOC} distributed scalable objects repository},
Url = {http://compass2.di.unipi.it/TR/files/TR-04-08.ps.gz},
Year = {2004},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/files/TR-04-08.ps.gz}}

M. Aldinucci and M. Torquati, “Accelerating apache farms through ad-HOC distributed scalable object repository,” in Proc. of 10th intl. euro-par 2004 parallel processing, 2004, pp. 596-605. doi:10.1007/978-3-540-27866-5_78
[Abstract] [BibTeX] [Download PDF]

@inproceedings{assist:adhoc:europar:04,
Abstract = {We present HOC: a fast, scalable object repository providing programmers with a general storage module. hoc may be used to implement DSMs as well as distributed cache subsystems. HOC is composed of a set of hot-pluggable cooperating processes that may sustain a close to optimal network traffic rate. We designed an HOC-based Web cache that extends the Apache Web server and remarkably improves Apache farms performances with no modification to the Apache core code.},
Author = {Marco Aldinucci and Massimo Torquati},
Booktitle = {Proc. of 10th Intl. Euro-Par 2004 Parallel Processing},
Date-Modified = {2012-07-13 19:06:26 +0200},
Doi = {10.1007/978-3-540-27866-5_78},
Editor = {Marco Danelutto and Marco Vanneschi and Domenico Laforenza},
Month = aug,
Pages = {596-605},
Publisher = {Springer},
Series = {LNCS},
Title = {Accelerating apache farms through {ad-HOC} distributed scalable object repository},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2004_hoc_europar.pdf},
Volume = {3149},
Year = {2004},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2004_hoc_europar.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-540-27866-5_78}}

M. Aldinucci, S. Campa, M. Coppola, S. Magini, P. Pesciullesi, L. Potiti, R. Ravazzolo, M. Torquati, and C. Zoccolo, “Targeting heterogeneous architectures in ASSIST: experimental results,” in Proc. of 10th intl. euro-par 2004 parallel processing, 2004, pp. 638-643. doi:10.1142/S0129626412400063
[Abstract] [BibTeX] [Download PDF]

@inproceedings{assist:hetero:europar:04,
Abstract = {We describe how the ASSIST parallel programming environment can be used to run parallel programs on collections of heterogeneous workstations and evaluate the scalability of one task-farm real application and a data-parallel benchmark, comparing the actual performance figures measured when using homogeneous and heterogeneous workstation clusters. We describe also the ASSIST approach to heterogeneous distributed shared memory and provide preliminary performance figures of the current implementation.},
Author = {Marco Aldinucci and Sonia Campa and Massimo Coppola and Silvia Magini and Paolo Pesciullesi and Laura Potiti and Roberto Ravazzolo and Massimo Torquati and Corrado Zoccolo},
Booktitle = {Proc. of 10th Intl. Euro-Par 2004 Parallel Processing},
Date-Modified = {2009-02-04 17:56:42 +0100},
Doi = {10.1142/S0129626412400063},
Editor = {Marco Danelutto and Marco Vanneschi and Domenico Laforenza},
Isbn = {978-3-540-22924-7},
Month = aug,
Pages = {638-643},
Publisher = {Springer},
Series = {LNCS},
Title = {Targeting heterogeneous architectures in {ASSIST}: Experimental results},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2004_hetero_europar.pdf},
Volume = {3149},
Year = {2004},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2004_hetero_europar.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1142/S0129626412400063}}

M. Aldinucci, S. Campa, P. Ciullo, M. Coppola, M. Danelutto, P. Pesciullesi, R. Ravazzolo, M. Torquati, M. Vanneschi, and C. Zoccolo, “A framework for experimenting with structure parallel programming environment design,” in Parallel computing: software technology, algorithms, architectures and applications (proc. of PARCO 2003, dresden, germany), 2004, pp. 617-624. doi:10.1016/S0927-5452(04)80077-7
[Abstract] [BibTeX] [Download PDF]

@inproceedings{assist:parco:03,
Abstract = {ASSIST is a parallel programming environment aimed at providing programmers of complex parallel application with a suitable and effective programming tool. Being based on algoritmical skeletons and coordination languages technologies, the programming environment relieves the programmer from a number of cumbersome, error prone activities that are required when using traditional parallel programming environments. ASSIST has been specifically designed to be easily customizable in order to experiment different implementation techniques, solutions, algorithms or back-ends any time new features are required or new technologies become available. In this work we discuss how this goal has been achieved and how the current ASSIST programming environment has been already used to experiment solutions not implemented in the first version of the tool.},
Author = {Marco Aldinucci and Sonia Campa and Pierpaolo Ciullo and Massimo Coppola and Marco Danelutto and Paolo Pesciullesi and Roberto Ravazzolo and Massimo Torquati and Marco Vanneschi and Corrado Zoccolo},
Booktitle = {Parallel Computing: Software Technology, Algorithms, Architectures and Applications (Proc. of {PARCO 2003}, Dresden, Germany)},
Date-Modified = {2012-11-26 18:49:59 +0000},
Doi = {10.1016/S0927-5452(04)80077-7},
Editor = {G. R. Joubert and W. E. Nagel and F. J. Peters and W. V. Walter},
Issn = {09275452},
Pages = {617-624},
Publisher = {Elsevier},
Series = {Advances in Parallel Computing},
Title = {A framework for experimenting with structure parallel programming environment design},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2004_assist_parco03.pdf},
Volume = {13},
Year = {2004},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2004_assist_parco03.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1016/S0927-5452(04)80077-7}}

M. Aldinucci, M. Coppola, M. Danelutto, M. Vanneschi, and C. Zoccolo, “Assist as a research framework for high-performance grid programming environments,” Università di Pisa, Dipartimento di Informatica, Italy, TR-04-09, 2004.
[BibTeX] [URL]

@techreport{assist:tr-04-09,
Author = {Marco Aldinucci and Massimo Coppola and Marco Danelutto and Marco Vanneschi and Corrado Zoccolo},
Date-Added = {2007-10-23 23:07:38 +0200},
Date-Modified = {2007-10-23 23:09:22 +0200},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica, Italy},
Month = feb,
Number = {TR-04-09},
Title = {ASSIST as a Research Framework for High-performance Grid Programming Environments},
Url = {http://compass2.di.unipi.it/TR/files/TR-04-09.ps.gz},
Year = {2004},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/files/TR-04-09.ps.gz}}

S. Campa and M. Danelutto, “A framework for orthogonal data and control parallelism exploitation,” in Proc. of intl. conference on computation science and its application, Assisi, Italy, 2004, pp. 206-213.
[BibTeX] [URL]

@inproceedings{CaDa04ICCA,
Address = {Assisi, Italy},
Author = {Sonia Campa and Marco Danelutto},
Booktitle = {Proc. of Intl. Conference on Computation Science and its Application},
Month = may,
Pages = {206-213},
Publisher = {Springer},
Series = {LNCS},
Title = {A framework for orthogonal data and control parallelism exploitation},
Url = {http://www.di.unipi.it/~campa},
Volume = {3044},
Year = {2004},
Bdsk-Url-1 = {http://www.di.unipi.it/~campa}}

M. Danelutto, “Adaptive task farm implementation strategies,” in Proc. of intl. euromicro pdp: parallel distributed and network-based processing, La Coruna, Spain, 2004, pp. 416-423.
[BibTeX] [URL]

@inproceedings{Da04pdp,
Address = {La Coruna, Spain},
Author = {Marco Danelutto},
Booktitle = {Proc. of Intl. Euromicro PDP: Parallel Distributed and network-based Processing},
Month = feb,
Pages = {416-423},
Publisher = {IEEE},
Title = {Adaptive Task Farm Implementation Strategies},
Url = {http://www.di.unipi.it/~marcod},
Year = {2004},
Bdsk-Url-1 = {http://www.di.unipi.it/~marcod}}

M. Danelutto, R. Esposito, and L. Verdoscia, “CODACS prototype: CHIARA language and its compiler,” in Proc. of intl. workshop on embedded computing systems (in conjunction with the icdcs’2004), Tokio, Japan, 2004.
[BibTeX] [URL]

@inproceedings{DaEsVe04ECS,
Address = {Tokio, Japan},
Author = {Marco Danelutto and Raffaele Esposito and Lorenzo Verdoscia},
Booktitle = {Proc. of Intl. Workshop on Embedded Computing Systems (in conjunction with the ICDCS'2004)},
Month = mar,
Title = {{CODACS} prototype: {CHIARA} language and its compiler},
Url = {http://www.di.unipi.it/~marcod},
Year = {2004},
Bdsk-Url-1 = {http://www.di.unipi.it/~marcod}}

Proc. of 10th intl. euro-par 2004 parallel processingPisa, Italy: Springer, 2004.
[BibTeX]

@proceedings{europar:04,
Address = {Pisa, Italy},
Date-Added = {2007-10-09 15:08:01 +0200},
Date-Modified = {2007-10-09 15:09:25 +0200},
Editor = {Marco Danelutto and Marco Vanneschi and Domenico Laforenza},
Month = aug,
Publisher = {Springer},
Series = {LNCS},
Title = {Proc. of 10th Intl. Euro-Par 2004 Parallel Processing},
Volume = {3149},
Year = {2004}}

M. Aldinucci, M. Danelutto, and J. Dünnweber, “Optimization techniques for implementing parallel skeletons in grid environments,” in Proc. of cmpp: intl. workshop on constructive methods for parallel programming, Stirling, Scotland, UK, 2004, pp. 35-47.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{lith_rmi:cmpp:04,
Abstract = {Skeletons are common patterns of parallelism like, e.g., farm, pipeline that can be abstracted and offered to the application programmer as programming primitives. We describe the use and implementation of skeletons in a distributed grid environment, with the Java-based system Lithium as our reference implementation. Our main contribution are optimization techniques based on an asynchronous, optimized RMI interaction mechanism, which we integrated into the macro data flow (MDF) implementation technology of Lithium. We report initial experimental results that demonstrate the achieved improvements through the proposed optimizations on a simple grid testbed.},
Address = {Stirling, Scotland, UK},
Author = {Marco Aldinucci and Marco Danelutto and Jan D{\"u}nnweber},
Booktitle = {Proc. of CMPP: Intl. Workshop on Constructive Methods for Parallel Programming},
Date-Modified = {2007-09-16 18:42:29 +0200},
Editor = {S. Gorlatch},
Month = jul,
Pages = {35-47},
Publisher = {Universit{\"a}t M{\"u}nster, Germany},
Title = {Optimization Techniques for Implementing Parallel Skeletons in Grid Environments},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2004_RMI_cmpp.pdf},
Year = {2004},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2004_RMI_cmpp.pdf}}

M. Aldinucci and M. Danelutto, “An operational semantics for skeletons,” in Parallel computing: software technology, algorithms, architectures and applications (proc. of PARCO 2003, dresden, germany), Germany, 2004, pp. 63-70. doi:10.1016/S0927-5452(04)80011-X
[Abstract] [BibTeX] [Download PDF]

@inproceedings{lith:sem:parco:03,
Abstract = {A major weakness of the current programming systems based on skeletons is that parallel semantics is usually provided in an informal way, thus preventing any formal comparison about program behavior. We describe a schema suitable for the description of both functional and parallel semantics of skeletal languages which is aimed at filling this gap. The proposed schema of semantics represents a handy framework to prove the correctness and validate different rewriting rules. These can be used to transform a skeleton program into a functionally equivalent but possibly faster version.},
Address = {Germany},
Author = {Marco Aldinucci and Marco Danelutto},
Booktitle = {Parallel Computing: Software Technology, Algorithms, Architectures and Applications (Proc. of {PARCO 2003}, Dresden, Germany)},
Date-Modified = {2012-07-15 14:39:27 +0000},
Doi = {10.1016/S0927-5452(04)80011-X},
Editor = {G. R. Joubert and W. E. Nagel and F. J. Peters and W. V. Walter},
Pages = {63-70},
Publisher = {Elsevier},
Series = {Advances in Parallel Computing},
Title = {An operational semantics for skeletons},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2004_sem_parco03.pdf},
Volume = {13},
Year = {2004},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2004_sem_parco03.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1016/S0927-5452(04)80011-X}}

2003

M. Aldinucci, S. Campa, P. Ciullo, M. Coppola, M. Danelutto, P. Pesciullesi, R. Ravazzolo, M. Torquati, M. Vanneschi, and C. Zoccolo, “ASSIST demo: a high level, high performance, portable, structured parallel programming environment at work,” in Proc. of 9th intl. euro-par 2003 parallel processing, Klagenfurt, Austria, 2003, pp. 1295-1300. doi:10.1007/978-3-540-45209-6_176
[Abstract] [BibTeX] [Download PDF]

@inproceedings{assist:demo:europar:03,
Abstract = {This work summarizes the possibilities offered by parallel programming environment ASSIST by outlining some of the features that will be demonstrated at the conference demo session. We'll substantially show how this environment can be deployed on a Linux workstation network/cluster, how applications can be compiled and run using ASSIST and eventually, we'll discuss some ASSIST scalability and performance features. We'll also outline how the ASSIST environment can be used to target GRID architectures.},
Address = {Klagenfurt, Austria},
Author = {Marco Aldinucci and Sonia Campa and Pierpaolo Ciullo and Massimo Coppola and Marco Danelutto and Paolo Pesciullesi and Roberto Ravazzolo and Massimo Torquati and Marco Vanneschi and Corrado Zoccolo},
Booktitle = {Proc. of 9th Intl. Euro-Par 2003 Parallel Processing},
Date-Modified = {2012-11-10 02:24:20 +0000},
Doi = {10.1007/978-3-540-45209-6_176},
Editor = {H. Kosch and L. B{\"o}sz{\"o}rm{\'e}nyi and H. Hellwagner},
Month = aug,
Pages = {1295-1300},
Publisher = {Springer},
Series = {LNCS},
Title = {{ASSIST} demo: a high level, high performance, portable, structured parallel programming environment at work},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2003_assist_demo_europar.pdf},
Volume = {2790},
Year = {2003},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2003_assist_demo_europar.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/978-3-540-45209-6_176}}

M. Aldinucci, S. Campa, P. Ciullo, M. Coppola, S. Magini, P. Pesciullesi, L. Potiti, R. Ravazzolo, M. Torquati, M. Vanneschi, and C. Zoccolo, “The implementation of ASSIST, an environment for parallel and distributed programming,” in Proc. of 9th intl euro-par 2003 parallel processing, Klagenfurt, Austria, 2003, pp. 712-721. doi:10.1007/b12024
[Abstract] [BibTeX] [Download PDF]

@inproceedings{assist:imp:europar:03,
Abstract = {We describe the implementation of ASSIST, a programming environment for parallel and distributed programs. Its coordination language is based of the parallel skeleton model, extended with new features to enhance expressiveness, parallel software reuse, software component integration and interfacing to external resources. The compilation process and the structure of the run-time support of ASSIST are discussed with respect to the issues introduced by the new characteristics, presenting an analysis of the first test results.},
Address = {Klagenfurt, Austria},
Author = {Marco Aldinucci and Sonia Campa and Pierpaolo Ciullo and Massimo Coppola and Silvia Magini and Paolo Pesciullesi and Laura Potiti and Roberto Ravazzolo and Massimo Torquati and Marco Vanneschi and Corrado Zoccolo},
Booktitle = {Proc. of 9th Intl Euro-Par 2003 Parallel Processing},
Date-Modified = {2010-10-24 15:29:07 +0200},
Doi = {10.1007/b12024},
Editor = {H. Kosch and L. B{\"o}sz{\"o}rm{\'e}nyi and H. Hellwagner},
Isbn = {978-3-540-40788-1},
Month = aug,
Pages = {712-721},
Publisher = {Springer},
Series = {LNCS},
Title = {The Implementation of {ASSIST}, an Environment for Parallel and Distributed Programming},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2003_assist_imp_europar.pdf},
Volume = {2790},
Year = {2003},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2003_assist_imp_europar.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1007/b12024}}

M. Aldinucci, M. Danelutto, and P. Teti, “An advanced environment supporting structured parallel programming in Java,” Future generation computer systems, vol. 19, iss. 5, pp. 611-626, 2003. doi:10.1016/S0167-739X(02)00172-3
[Abstract] [BibTeX] [Download PDF]

@article{lithium:fgcs:03,
Abstract = {In this work we present Lithium, a pure Java structured parallel programming environment based on skeletons (common, reusable and efficient parallelism exploitation patterns). Lithium is implemented as a Java package and represents both the first skeleton based programming environment in Java and the first complete skeleton based Java environment exploiting macro-data flow implementation techniques.
Lithium supports a set of user code optimizations which are based on skeleton rewriting techniques. These optimizations improve both absolute performance and resource usage with respect to original user code. Parallel programs developed using the library run on any network of workstations provided the workstations support plain JRE. The paper describes the library implementation, outlines the optimization techniques used and eventually presents the performance results obtained on both synthetic and real applications.},
Author = {Marco Aldinucci and Marco Danelutto and Paolo Teti},
Date-Modified = {2014-08-24 22:16:31 +0000},
Doi = {10.1016/S0167-739X(02)00172-3},
Journal = {Future Generation Computer Systems},
Month = jul,
Number = {5},
Pages = {611-626},
Title = {An advanced environment supporting structured parallel programming in {Java}},
pdf = {http://calvados.di.unipi.it/storage/paper_files/2003_lithium_fgcs.pdf},
Volume = {19},
Year = {2003},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/2003_lithium_fgcs.pdf},
Bdsk-Url-2 = {http://dx.doi.org/10.1016/S0167-739X(02)00172-3}}

R. Baraglia, M. Danelutto, D. Laforenza, S. Orlando, P. Palmerini, R. Perego, P. Pesciullesi, and M. Vanneschi, “AssistConf: a grid configuration tool for the ASSIST parallel programming environment,” in Proc. of intl. euromicro pdp: parallel distributed and network-based processing, Genova, Italy, 2003, pp. 193-200.
[BibTeX]

@inproceedings{BaDaLaOr03pdp,
Address = {Genova, Italy},
Author = {Ranieri Baraglia and Marco Danelutto and Domenico Laforenza and Salvatore Orlando and Paolo Palmerini and Raffaele Perego and Paolo Pesciullesi and Marco Vanneschi},
Booktitle = {Proc. of Intl. Euromicro PDP: Parallel Distributed and network-based Processing},
Month = feb,
Pages = {193-200},
Publisher = {IEEE},
Title = {{AssistConf}: A Grid Configuration Tool for the {ASSIST} Parallel Programming Environment},
Year = {2003}}

P. D’Ambra, M. Danelutto, D. di Serafino, and M. Lapegna, “Integrating MPI-based numerical software into an advanced parallel computing environment,” in Proc. of intl. euromicro pdp: parallel distributed and network-based processing, Genova, Italy, 2003, pp. 283-291.
[BibTeX]

@inproceedings{dambra:lib:pdp:02,
Address = {Genova, Italy},
Author = {Pasqua D'Ambra and Marco Danelutto and Daniela di Serafino and Marco Lapegna},
Booktitle = {Proc. of Intl. Euromicro PDP: Parallel Distributed and network-based Processing},
Month = feb,
Opturl = {http://www.di.unipi.it/~marcod},
Pages = {283-291},
Publisher = {IEEE},
Title = {Integrating {MPI}-Based Numerical Software into an Advanced Parallel Computing Environment},
Year = {2003}}

M. Danelutto, “HPC the easy way: new technologies for high performance applications deployment,” Journal of systems architecture, vol. 49, iss. 10-11, pp. 399-419, 2003.
[BibTeX] [URL]

@article{Da03JSA,
Author = {Marco Danelutto},
Journal = {Journal of Systems Architecture},
Month = nov,
Number = {10-11},
Pages = {399-419},
Title = {{HPC} the easy way: new technologies for high performance applications deployment},
Url = {http://www.di.unipi.it/~marcod},
Volume = {49},
Year = {2003},
Bdsk-Url-1 = {http://www.di.unipi.it/~marcod}}

M. Danelutto, “HPC the easy way: new technologies for high performance applications deployment (invited talk),” in Proc. of intl. euromicro pdp: parallel distributed and network-based processing, 2003, p. 180-.
[BibTeX] [URL]

@inproceedings{DBLP:conf/pdp/Danelutto03,
Addresss = {Genova, Italy},
Author = {Marco Danelutto},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Booktitle = {Proc. of Intl. Euromicro PDP: Parallel Distributed and network-based Processing},
Date-Modified = {2007-11-03 14:36:32 +0100},
Isbn = {0-7695-1875-3},
Month = feb,
Pages = {180-},
Publisher = {IEEE},
Title = {{HPC} the easy way: new technologies for high performance applications deployment (invited talk)},
Url = {http://doi.ieeecomputersociety.org/10.1109/EMPDP.2003.1183585},
Year = {2003},
Bdsk-Url-1 = {http://doi.ieeecomputersociety.org/10.1109/EMPDP.2003.1183585}}

2002

M. Aldinucci and M. Danelutto, “An operational semantic for skeletons,” Università di Pisa, Dipartimento di Informatica, Italy, TR-02-13, 2002.
[BibTeX] [URL]

@techreport{semantics:tr-02-13,
Author = {Marco Aldinucci and Marco Danelutto},
Date-Added = {2007-10-23 23:14:08 +0200},
Date-Modified = {2007-10-23 23:16:36 +0200},
Institution = {Universit{\`a} di Pisa, Dipartimento di Informatica, Italy},
Month = jul,
Number = {TR-02-13},
Title = {An operational semantic for skeletons},
Url = {http://compass2.di.unipi.it/TR/files/TR-02-13.ps.gz},
Year = {2002},
Bdsk-Url-1 = {http://compass2.di.unipi.it/TR/files/TR-02-13.ps.gz}}

P. D’Ambra, M. Danelutto, and D. di Serafino, “Advanced environments for parallel and distributed computing,” Parallel computing, vol. 28, iss. 12, pp. 1635-1636, 2002.
[BibTeX]

@article{DBLP:journals/pc/DAmbraDS02,
Author = {Pasqua D'Ambra and Marco Danelutto and Daniela di Serafino},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Ee = {http://dx.doi.org/10.1016/S0167-8191(02)00183-7},
Journal = {Parallel Computing},
Number = {12},
Pages = {1635-1636},
Title = {Advanced environments for parallel and distributed computing},
Volume = {28},
Year = {2002}}

P. D’Ambra, M. Danelutto, D. di Serafino, and M. Lapegna, “Advanced environments for parallel and distributed applications: a view of current status,” Parallel computing, vol. 28, iss. 12, pp. 1637-1662, 2002.
[BibTeX]

@article{DBLP:journals/pc/DAmbraDSL02,
Author = {Pasqua D'Ambra and Marco Danelutto and Daniela di Serafino and Marco Lapegna},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Ee = {http://dx.doi.org/10.1016/S0167-8191(02)00199-0},
Journal = {Parallel Computing},
Number = {12},
Pages = {1637-1662},
Title = {Advanced environments for parallel and distributed applications: a view of current status},
Volume = {28},
Year = {2002}}

M. Danelutto and P. Teti, “Lithium: a structured parallel programming enviroment in Java,” in Proc. of iccs: intl. conference on computational science, 2002, pp. 844-853.
[BibTeX] [URL]

@inproceedings{DaTe02ICCS,
Author = {Marco Danelutto and Paolo Teti},
Booktitle = {Proc. of ICCS: Intl. Conference on Computational Science},
Month = apr,
Pages = {844-853},
Publisher = {Springer},
Series = {LNCS},
Title = {Lithium: A structured Parallel Programming Enviroment in {Java}},
Url = {http://www.di.unipi.it/~marcod},
Volume = {2330},
Year = {2002},
Bdsk-Url-1 = {http://www.di.unipi.it/~marcod}}

M. Danelutto and D. Ratti, “Skeletons in MPI,” in Proc. of intl. conference on parallel and distributed computing systems (pdcs), Cambridge, USA, 2002, pp. 387-392.
[BibTeX]

@inproceedings{DBLP:conf/pdcs/DaneluttoR02,
Address = {Cambridge, USA},
Author = {Marco Danelutto and D. Ratti},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Booktitle = {Proc. of Intl. Conference on Parallel and Distributed Computing Systems (PDCS)},
Editor = {Selim G. Akl and Teofilo F. Gonzalez},
Month = nov,
Pages = {387-392},
Publisher = {IASTED/ACTA Press},
Title = {Skeletons in {MPI}},
Year = {2002}}

M. Danelutto, “On skeletons and design patterns,” in Parallel computing: advances and current issues (proc. of intl. ParCo 2001), Naples, Italy, 2002, pp. 425-432.
[BibTeX]

@inproceedings{marcod:parco:01,
Address = {Naples, Italy},
Author = {Marco Danelutto},
Booktitle = {Parallel Computing: Advances and Current Issues (Proc. of Intl. {ParCo} 2001)},
Date-Added = {2008-05-11 18:17:38 +0200},
Date-Modified = {2008-05-11 18:21:45 +0200},
Editor = {G. R. Joubert and A. Murli and F. J. Peters and Marco Vanneschi},
Pages = {425-432},
Publisher = {Imperial College Press},
Title = {On skeletons and design patterns},
Year = {2002}}

2001

M. Danelutto, “Efficient support for skeletons on workstation clusters,” Parallel processing letters, vol. 11, iss. 1, pp. 41-56, 2001.
[BibTeX] [URL]

@article{Da01PPL,
Author = {Marco Danelutto},
Journal = {Parallel Processing Letters},
Number = {1},
Pages = {41-56},
Title = {Efficient support for skeletons on workstation clusters},
Url = {http://www.di.unipi.it/~marcod},
Volume = {11},
Year = {2001},
Bdsk-Url-1 = {http://www.di.unipi.it/~marcod}}

M. Danelutto and A. Rampini, “Fast short messages on a linux cluster,” in Proc. of the hpcn: high performance computing and newtworking, 2001, pp. 393-402.
[BibTeX] [URL]

@inproceedings{DaRa01HPCN,
Author = {Marco Danelutto and Andrea Rampini},
Booktitle = {Proc. of the HPCN: High Performance Computing and Newtworking},
Pages = {393-402},
Publisher = {Springer},
Series = {LNCS},
Title = {Fast short messages on a Linux cluster},
Url = {http://www.di.unipi.it/~marcod},
Volume = {2110},
Year = {2001},
Bdsk-Url-1 = {http://www.di.unipi.it/~marcod}}

2000

M. Danelutto, “Task farm computations in java,” in Proc. of the 8th intl. conference on high-performance computing and networking (hpcn europe 2000), Amsterdam, The Netherlands, 2000, pp. 385-394.
[BibTeX] [URL]

@inproceedings{DBLP:conf/hpcn/Danelutto00,
Address = {Amsterdam, The Netherlands},
Author = {Marco Danelutto},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Booktitle = {Proc. of the 8th Intl. Conference on High-Performance Computing and Networking (HPCN Europe 2000)},
Editor = {Marian Bubak and Hamideh Afsarmanesh and Roy Williams and Louis O. Hertzberger},
Isbn = {3-540-67553-1},
Month = may,
Pages = {385-394},
Publisher = {Springer},
Series = {LNCS},
Title = {Task Farm Computations in Java},
Url = {http://link.springer.de/link/service/series/0558/bibs/1823/18230385.htm},
Volume = {1823},
Year = {2000},
Bdsk-Url-1 = {http://link.springer.de/link/service/series/0558/bibs/1823/18230385.htm}}

M. Danelutto and G. Pucci, “A compact, thread-safe communication library for efficient cluster computing,” in Proc. of the 8th intl. conference on high-performance computing and networking (hpcn europe 2000), Amsterdam, The Netherlands, 2000, pp. 407-416.
[BibTeX] [URL]

@inproceedings{DBLP:conf/hpcn/DaneluttoP00,
Address = {Amsterdam, The Netherlands},
Author = {Marco Danelutto and Geppino Pucci},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Booktitle = {Proc. of the 8th Intl. Conference on High-Performance Computing and Networking (HPCN Europe 2000)},
Editor = {Marian Bubak and Hamideh Afsarmanesh and Roy Williams and Louis O. Hertzberger},
Isbn = {3-540-67553-1},
Month = may,
Pages = {407-416},
Publisher = {Springer},
Series = {LNCS},
Title = {A Compact, Thread-Safe Communication Library for Efficient Cluster Computing},
Url = {http://link.springer.de/link/service/series/0558/bibs/1823/18230407.htm},
Volume = {1823},
Year = {2000},
Bdsk-Url-1 = {http://link.springer.de/link/service/series/0558/bibs/1823/18230407.htm}}

M. Danelutto and M. Stigliani, “SKElib: parallel programming with skeletons in C,” in Proc. of 6th intl. euro-par 2000 parallel processing, Munich, Germany, 2000, pp. 1175-1184.
[BibTeX]

@inproceedings{stigliani:europar:00,
Address = {Munich, Germany},
Author = {Marco Danelutto and Massimiliano Stigliani},
Booktitle = {Proc. of 6th Intl. Euro-Par 2000 Parallel Processing},
Editor = {A. Bode and T. Ludwing and W. Karl and R. Wism{\"u}ller},
Month = aug,
Pages = {1175-1184},
Publisher = {Springer},
Series = {LNCS},
Title = {{SKElib}: parallel programming with skeletons in {C}},
Volume = {1900},
Year = {2000}}

1999

M. Aldinucci and M. Danelutto, “Stream parallel skeleton optimization,” in Proc. of pdcs: intl. conference on parallel and distributed computing and systems, Cambridge, Massachusetts, USA, 1999, pp. 955-962.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{pdcs:nf:99,
Abstract = {We discuss the properties of the composition of stream parallel
skeletons such as pipelines and farms. By looking at the ideal
performance figures assumed to hold for these skeletons, we show
that any stream parallel skeleton composition can always be
rewritten into an equivalent "normal form" skeleton composition,
delivering a service time which is equal or even better to the
service time of the original skeleton composition, and achieving a
better utilization of the processors used. The normal form is
defined as a single farm built around a sequential worker code.
Experimental results are discussed that validate this normal form.},
Address = {Cambridge, Massachusetts, USA},
Author = {Marco Aldinucci and Marco Danelutto},
Booktitle = {Proc. of PDCS: Intl. Conference on Parallel and Distributed Computing and Systems},
Date-Modified = {2007-09-16 18:40:51 +0200},
Month = nov,
Organization = {IASTED},
Pages = {955-962},
Publisher = {ACTA press},
Title = {Stream parallel skeleton optimization},
pdf = {http://calvados.di.unipi.it/storage/paper_files/1999_NF_pdcs.pdf},
Year = {1999},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/1999_NF_pdcs.pdf}}

B. Bacci, M. Danelutto, S. Pelagatti, and M. Vanneschi, “SkIE: a heterogeneous environment for HPC applications,” Parallel computing, vol. 25, iss. 13-14, pp. 1827-1852, 1999.
[BibTeX]

@article{skie:PC:1999,
Author = {Bruno Bacci and Marco Danelutto and Susanna Pelagatti and Marco Vanneschi},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Journal = {Parallel Computing},
Number = {13-14},
Pages = {1827-1852},
Title = {{SkIE}: A heterogeneous environment for {HPC} applications},
Volume = {25},
Year = {1999}}

M. Danelutto, “Dynamic run time support for skeletons,” in Proc. of intl. parco 99: parallel computing, E. H. D’Hollander, G. R. Joubert, F. J. Peters, and H. J. Sips, Eds., Imperial College Press, 1999, pp. 460-467.
[BibTeX]

@incollection{MDF:parco:99,
Author = {Marco Danelutto},
Booktitle = {Proc. of Intl. PARCO 99: Parallel Computing},
Editor = {E. H. D'Hollander and G. R. Joubert and F. J. Peters and H. J. Sips},
Pages = {460-467},
Publisher = {Imperial College Press},
Series = {Parallel Computing Fundamentals \& Applications},
Title = {Dynamic Run Time Support for Skeletons},
Year = {1999}}

1998

M. Aldinucci, M. Coppola, and M. Danelutto, “Rewriting skeleton programs: how to evaluate the data-parallel stream-parallel tradeoff,” in Proc. of cmpp: intl. workshop on constructive methods for parallel programming, 1998, pp. 44-58.
[Abstract] [BibTeX] [Download PDF]

@inproceedings{aldinuc:stream-data:98,
Abstract = { Some skeleton based parallel programming models allow the programmer to
use both data and stream parallel skeletons within the same program.
It is known that particular skeleton nestings can be formally
rewritten into different nestings that preserve the functional
semantics. Indeed, the kind and possibly the amount of parallelism
usefully exploitable may change while rewriting takes place.
Here we discuss an original framework allowing the user (and/or the
compiling tools) of a skeleton based parallel programming language to
evaluate whether or not the transformation of a skeleton program
is worthwhile in terms of the final program performance. We address,
in particular, the evaluation of transformations exchanging data
parallel and stream parallel skeleton subtrees.},
Author = {Marco Aldinucci and Massimo Coppola and Marco Danelutto},
Booktitle = {Proc. of CMPP: Intl. Workshop on Constructive Methods for Parallel Programming},
Date-Modified = {2007-09-16 18:40:40 +0200},
Editor = {S. Gorlatch},
Month = may,
Optnumber = {MIP-9805},
Optseries = {University of Passau technical report},
Organization = {Fakult{\"a}t f{\"u}r mathematik und informatik},
Pages = {44-58},
Publisher = {Uni. Passau, Germany},
Title = {Rewriting skeleton programs: How to evaluate the data-parallel stream-parallel tradeoff},
pdf = {http://calvados.di.unipi.it/storage/paper_files/1998_transf_cmpp.pdf},
Year = {1998},
Bdsk-Url-1 = {http://calvados.di.unipi.it/storage/paper_files/1998_transf_cmpp.pdf}}

M. Danelutto, R. D. Cosmo, X. Leroy, and S. Pelagatti, “Parallel functional programming with skeletons: the OCAMLP3L experiment,” in ACM sigplan workshop on ML, 1998, pp. 31-39.
[BibTeX]

@inproceedings{ocamlp3l:98,
Author = {Marco Danelutto and Roberto Di Cosmo and Xavier Leroy and Susanna Pelagatti},
Booktitle = {{ACM} Sigplan Workshop on {ML}},
Date-Added = {2008-05-05 23:25:08 +0200},
Date-Modified = {2008-05-05 23:27:16 +0200},
Organization = {ACM},
Pages = {31-39},
Title = {Parallel Functional Programming with Skeletons: the {OCAMLP3L} experiment},
Year = {1998}}

D. B. Skillicorn, M. Danelutto, S. Pelagatti, and A. Zavanella, “Optimizing data-parallel programs using the bsp cost model,” in Proc. of 4th intl. euro-par ’98 parallel processing, Southampton, UK, 1998, pp. 698-703.
[BibTeX] [URL]

@inproceedings{DBLP:conf/europar/SkillicornDPZ98,
Address = {Southampton, UK},
Author = {David B. Skillicorn and Marco Danelutto and Susanna Pelagatti and Andrea Zavanella},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Booktitle = {Proc. of 4th Intl. Euro-Par '98 Parallel Processing},
Date-Modified = {2007-11-03 14:38:19 +0100},
Editor = {David J. Pritchard and Jeff Reeve},
Pages = {698-703},
Series = {LNCS},
Title = {Optimizing Data-Parallel Programs Using the BSP Cost Model},
Url = {http://link.springer.de/link/service/series/0558/bibs/1470/14700698.htm},
Volume = {1470},
Year = {1998},
Bdsk-Url-1 = {http://link.springer.de/link/service/series/0558/bibs/1470/14700698.htm}}

1997

B. Bacci, B. Cantalupo, M. Danelutto, S. Orlando, D. Pasetto, S. Pelagatti, and M. Vanneschi, “An environment for structured parallel programming,” in Advances in high performance computing, 1997, pp. 219-252.
[BibTeX]

@inproceedings{bcd:nato-asi:97,
Annote = {Volume 3},
Author = {Bruno Bacci and Barbara Cantalupo and Marco Danelutto and Salvatore Orlando and Davide Pasetto and Susanna Pelagatti and Marco Vanneschi},
Booktitle = {Advances in High Performance Computing},
Date-Added = {2008-05-05 23:20:29 +0200},
Date-Modified = {2008-09-14 14:12:25 +0200},
Number = {30},
Pages = {219-252},
Publisher = {Kluwer},
Series = {NATO-ASI},
Title = {An Environment for structured parallel programming},
Year = {1997}}

S. Ciarpaglini, M. Danelutto, L. Folchi, C. Manconi, and S. Pelagatti, “ANACLETO: a template-based P3L compiler,” in Proc. of the parallel computing workshop (pcw’97), 1997.
[BibTeX]

@inproceedings{anacleto-australia,
Author = {Silvia Ciarpaglini and Marco Danelutto and Laura Folchi and Carlo Manconi and Susanna Pelagatti},
Booktitle = {Proc. of the Parallel Computing Workshop (PCW'97)},
Date-Modified = {2007-10-08 15:44:39 +0200},
Note = {Camberra, Australia},
Title = {{ANACLETO:} a template-based {P3L} compiler},
Year = {1997}}

M. Danelutto, F. Pasqualetti, and S. Pelagatti, “Skeletons for data parallelism in P3L,” in Proc. of 3th intl. euro-par ’97 parallel processing, Passau, Germany, 1997, pp. 619-628.
[BibTeX]

@inproceedings{DBLP:conf/europar/DaneluttoPP97,
Address = {Passau, Germany},
Author = {Marco Danelutto and Fabrizio Pasqualetti and Susanna Pelagatti},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Booktitle = {Proc. of 3th Intl. Euro-Par '97 Parallel Processing},
Date-Modified = {2007-11-03 14:33:14 +0100},
Editor = {Christian Lengauer and Martin Griebl and Sergei Gorlatch},
Isbn = {3-540-63440-1},
Pages = {619-628},
Publisher = {Springer},
Series = {LNCS},
Title = {Skeletons for Data Parallelism in {P3L}},
Volume = {1300},
Year = {1997}}

1996

A. Ceccolini, M. Danelutto, G. Orsini, and S. Pelagatti, “A tool for the development of structured parallel applications,” in Proc. of the 4th intl. conference on high-performance computing and networking (hpcn europe 1996), 1996, pp. 485-492.
[BibTeX]

@inproceedings{DBLP:conf/hpcn/CeccoliniDOP96,
Adresss = {Brussels, Belgium},
Author = {A. Ceccolini and Marco Danelutto and G. Orsini and Susanna Pelagatti},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Booktitle = {Proc. of the 4th Intl. Conference on High-Performance Computing and Networking (HPCN Europe 1996)},
Editor = {Heather M. Liddell and Adrian Colbrook and Louis O. Hertzberger and Peter M. A. Sloot},
Isbn = {3-540-61142-8},
Month = apr,
Pages = {485-492},
Publisher = {Springer},
Series = {LNCS},
Title = {A Tool for the Development of Structured Parallel Applications},
Volume = {1067},
Year = {1996}}

M. Danelutto, S. Pelagatti, R. Ravazzolo, and A. Riaudo, “Parallel ocr in p3l: a case study,” in Proc. of the 4th intl. conference on high-performance computing and networking (hpcn europe 1996), 1996, pp. 1017-1019.
[BibTeX]

@inproceedings{DBLP:conf/hpcn/DaneluttoPRR96,
Adresss = {Brussels, Belgium},
Author = {Marco Danelutto and Susanna Pelagatti and Roberto Ravazzolo and A. Riaudo},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Booktitle = {Proc. of the 4th Intl. Conference on High-Performance Computing and Networking (HPCN Europe 1996)},
Editor = {Heather M. Liddell and Adrian Colbrook and Louis O. Hertzberger and Peter M. A. Sloot},
Isbn = {3-540-61142-8},
Month = apr,
Pages = {1017-1019},
Publisher = {Springer},
Series = {{LNCS}},
Title = {Parallel OCR in P3L: A Case Study},
Volume = {1067},
Year = {1996}}

M. Danelutto, D. G. Caprio, and A. Masini, “Parallelizing a model checker,” in Proc. of the intl. conference on parallel and distributed processing techniques and applications, (pdpta 1996), Sunnyvale, CA, USA, 1996, pp. 1118-1128.
[BibTeX]

@inproceedings{DBLP:conf/pdpta/DaneluttoCM96,
Address = {Sunnyvale, CA, USA},
Author = {Marco Danelutto and G. Di Caprio and A. Masini},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Booktitle = {Proc. of the Intl. Conference on Parallel and Distributed Processing Techniques and Applications, (PDPTA 1996)},
Editor = {Hamid R. Arabnia},
Isbn = {0-9648666-4-1},
Month = aug,
Pages = {1118-1128},
Publisher = {CSREA Press},
Title = {Parallelizing A Model Checker},
Year = {1996}}

1995

B. Bacci, M. Danelutto, S. Pelagatti, M. Vanneschi, and S. Orlando, “Summarising an experiment in parallel programming language design,” in High-performance computing and networking, international conference and exhibition, hpcn europe 1995, proceedings, Milan, Italy, 1995, pp. 7-13.
[BibTeX]

@inproceedings{DBLP:conf/hpcn/BacciDPVO95,
Address = {Milan, Italy},
Author = {Bruno Bacci and Marco Danelutto and Susanna Pelagatti and Marco Vanneschi and Salvatore Orlando},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Booktitle = {High-Performance Computing and Networking, International Conference and Exhibition, HPCN Europe 1995, Proceedings},
Editor = {Louis O. Hertzberger and Giuseppe Serazzi},
Isbn = {3-540-59393-4},
Month = {may},
Pages = {7-13},
Publisher = {Springer},
Series = {LNCS},
Title = {Summarising an experiment in parallel programming language design},
Volume = {919},
Year = {1995}}

B. Bacci, M. Danelutto, S. Orlando, S. Pelagatti, and M. Vanneschi, “P3L: a structured high level programming language and its structured support,” Concurrency practice and experience, vol. 7, iss. 3, pp. 225-255, 1995. doi:10.1002/cpe.4330070305
[BibTeX]

@article{orlando-grosso,
Author = {Bruno Bacci and Marco Danelutto and Salvatore Orlando and Susanna Pelagatti and Marco Vanneschi},
Date-Modified = {2010-05-18 01:36:00 +0200},
Doi = {10.1002/cpe.4330070305},
Journal = {Concurrency Practice and Experience},
Month = may,
Number = {3},
Pages = {225-255},
Title = {{P3L:} A Structured High level programming language and its structured support},
Volume = {7},
Year = {1995},
Bdsk-Url-1 = {http://dx.doi.org/10.1002/cpe.4330070305}}

1992

M. Danelutto, R. D. Meglio, S. Orlando, S. Pelagatti, and M. Vanneschi, “A methodology for the development and the support of massively parallel programs,” Future generation compututer systems, vol. 8, iss. 1-3, pp. 205-220, 1992. doi:http://dx.doi.org/10.1016/0167-739X(92)90040-I
[BibTeX]

@article{p3l:hp:92,
Address = {Amsterdam, The Netherlands},
Author = {Marco Danelutto and Roberto Di Meglio and Salvatore Orlando and Susanna Pelagatti and Marco Vanneschi},
Doi = {http://dx.doi.org/10.1016/0167-739X(92)90040-I},
Issn = {0167-739X},
Journal = {Future Generation Compututer Systems},
Number = {1-3},
Pages = {205-220},
Publisher = {Elsevier},
Title = {A methodology for the development and the support of massively parallel programs},
Volume = {8},
Year = {1992},
Bdsk-Url-1 = {http://dx.doi.org/10.1016/0167-739X(92)90040-I}}