Journals

In proceedings

By years

  1. [1]T. Florian, K. Foutse, and A. Giuliano, “GIST: Generated Inputs Sets Transferability in Deep Learning,” ACM Trans. Softw. Eng. Methodol., Jun. 2024, . doi
    Bibtex
      @article{10.1145/3672457,
      author = {Florian, Tambon and Foutse, Khomh and Giuliano, Antoniol},
      title = {GIST: Generated Inputs Sets Transferability in Deep Learning},
      year = {2024},
      publisher = {Association for Computing Machinery},
      address = {New York, NY, USA},
      issn = {1049-331X},
      url = {https://doi.org/10.1145/3672457},
      doi = {10.1145/3672457},
      journal = {ACM Trans. Softw. Eng. Methodol.},
      month = jun,
      keywords = {test sets generation, deep learning, DNN, testing, transferability}
    }
    
      
  2. [2]F. Tambon, A. Nikanjam, L. An, F. Khomh, and G. Antoniol, “Silent bugs in deep learning frameworks: an empirical study of Keras and TensorFlow,” Empir. Softw. Eng., vol. 29, no. 1, p. 10, 2024, . doi
    Bibtex
      @article{DBLP:journals/ese/TambonNAKA24,
      author = {Tambon, Florian and Nikanjam, Amin and An, Le and Khomh, Foutse and Antoniol, Giuliano},
      title = {Silent bugs in deep learning frameworks: an empirical study of Keras
                        and TensorFlow},
      journal = {Empir. Softw. Eng.},
      volume = {29},
      number = {1},
      pages = {10},
      year = {2024},
      doi = {10.1007/S10664-023-10389-6},
      timestamp = {Sun, 31 Dec 2023 19:06:36 +0100},
      biburl = {https://dblp.org/rec/journals/ese/TambonNAKA24.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  3. [3]M. S. Rahman, F. Khomh, A. Hamidi, J. Cheng, G. Antoniol, and H. Washizaki, “Machine learning application development: practitioners’ insights,” Softw. Qual. J., vol. 31, no. 4, pp. 1065–1119, 2023, . doi
    Bibtex
      @article{DBLP:journals/sqj/RahmanKHCAW23,
      author = {Rahman, Md. Saidur and Khomh, Foutse and Hamidi, Alaleh and Cheng, Jinghui and Antoniol, Giuliano and Washizaki, Hironori},
      title = {Machine learning application development: practitioners' insights},
      journal = {Softw. Qual. J.},
      volume = {31},
      number = {4},
      pages = {1065--1119},
      year = {2023},
      url = {https://doi.org/10.1007/s11219-023-09621-9},
      doi = {10.1007/S11219-023-09621-9},
      timestamp = {Tue, 28 Nov 2023 20:05:42 +0100},
      biburl = {https://dblp.org/rec/journals/sqj/RahmanKHCAW23.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  4. [4]B. A. Muse, F. Khomh, and G. Antoniol, “Refactoring practices in the context of data-intensive systems,” Empir. Softw. Eng., vol. 28, no. 2, p. 46, 2023, . doi
    Bibtex
      @article{DBLP:journals/ese/MuseKA23,
      author = {Muse, Biruk Asmare and Khomh, Foutse and Antoniol, Giuliano},
      title = {Refactoring practices in the context of data-intensive systems},
      journal = {Empir. Softw. Eng.},
      volume = {28},
      number = {2},
      pages = {46},
      year = {2023},
      url = {https://doi.org/10.1007/s10664-022-10271-x},
      doi = {10.1007/S10664-022-10271-X},
      timestamp = {Sat, 25 Feb 2023 21:34:58 +0100},
      biburl = {https://dblp.org/rec/journals/ese/MuseKA23.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  5. [5]F. Tambon, F. Khomh, and G. Antoniol, “A probabilistic framework for mutation testing in deep neural networks,” Inf. Softw. Technol., vol. 155, p. 107129, 2023, . doi
    Bibtex
      @article{DBLP:journals/infsof/TambonKA23,
      author = {Tambon, Florian and Khomh, Foutse and Antoniol, Giuliano},
      title = {A probabilistic framework for mutation testing in deep neural networks},
      journal = {Inf. Softw. Technol.},
      volume = {155},
      pages = {107129},
      year = {2023},
      url = {https://doi.org/10.1016/j.infsof.2022.107129},
      doi = {10.1016/J.INFSOF.2022.107129},
      timestamp = {Sun, 15 Jan 2023 18:31:36 +0100},
      biburl = {https://dblp.org/rec/journals/infsof/TambonKA23.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  6. [6]D. Humeniuk, F. Khomh, and G. Antoniol, “AmbieGen: A search-based framework for autonomous systems testing,” Sci. Comput. Program., vol. 230, p. 102990, 2023, . doi
    Bibtex
      @article{DBLP:journals/scp/HumeniukKA23,
      author = {Humeniuk, Dmytro and Khomh, Foutse and Antoniol, Giuliano},
      title = {AmbieGen: {A} search-based framework for autonomous systems testing},
      journal = {Sci. Comput. Program.},
      volume = {230},
      pages = {102990},
      year = {2023},
      url = {https://doi.org/10.1016/j.scico.2023.102990},
      doi = {10.1016/J.SCICO.2023.102990},
      timestamp = {Fri, 27 Oct 2023 20:40:12 +0200},
      biburl = {https://dblp.org/rec/journals/scp/HumeniukKA23.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  7. [7]F. Tambon et al., “How to certify machine learning based safety-critical systems? A systematic literature review,” Automated Software Engineering, vol. 29, Nov. 2022, . doi
    Bibtex
      @article{article,
      author = {Tambon, Florian and Laberge, Gabriel and An, Le and Nikanjam, Amin and Mindom, Paulina and Pequignot, Yann and Khomh, Foutse and Antoniol, Giulio and Merlo, Ettore and Laviolette, Francois},
      year = {2022},
      month = nov,
      pages = {},
      title = {How to certify machine learning based safety-critical systems? A systematic literature review},
      volume = {29},
      journal = {Automated Software Engineering},
      doi = {10.1007/s10515-022-00337-x}
    }
    
      
  8. [8]F. Zampetti, S. Mudbhari, V. Arnaoudova, M. D. Penta, S. Panichella, and G. Antoniol, “Using code reviews to automatically configure static analysis tools,” Empir. Softw. Eng., vol. 27, no. 1, p. 28, 2022, . doi
    Bibtex
      @article{DBLP:journals/ese/ZampettiMAPPA22,
      author = {Zampetti, Fiorella and Mudbhari, Saghan and Arnaoudova, Venera and Penta, Massimiliano Di and Panichella, Sebastiano and Antoniol, Giuliano},
      title = {Using code reviews to automatically configure static analysis tools},
      journal = {Empir. Softw. Eng.},
      volume = {27},
      number = {1},
      pages = {28},
      year = {2022},
      url = {https://doi.org/10.1007/s10664-021-10076-4},
      doi = {10.1007/s10664-021-10076-4},
      timestamp = {Sat, 25 Dec 2021 15:51:52 +0100},
      biburl = {https://dblp.org/rec/journals/ese/ZampettiMAPPA22.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  9. [9]B. Asmare-Muse, C. Nagy, A. Cleve, F. Khomh, and G. Antoniol, “FIXME: Synchronize with Database - An Empirical Study of Data Access Self-Admitted Technical Debt,” Empirical Software Engineering , 2022.
    Bibtex
      @article{ed10dadd69224e058a193608d779f0b2,
      title = {FIXME: Synchronize with Database - An Empirical Study of Data Access Self-Admitted Technical Debt},
      author = {Asmare-Muse, Biruk and Nagy, Csaba and Cleve, Anthony and Khomh, Foutse and Antoniol, Giuliano},
      year = {2022},
      language = {English},
      journal = {Empirical Software Engineering },
      issn = {1382-3256},
      publisher = {Springer}
    }
    
      
  10. [10]C. Coviello, S. Romano, G. Scanniello, and G. Antoniol, “GASSER: A Multi-Objective Evolutionary Approach for Test Suite Reduction,” Int. J. Softw. Eng. Knowl. Eng., vol. 32, no. 2, pp. 193–225, 2022, . doi
    Bibtex
      @article{DBLP:journals/ijseke/CovielloRSA22,
      author = {Coviello, Carmen and Romano, Simone and Scanniello, Giuseppe and Antoniol, Giuliano},
      title = {GASSER: A Multi-Objective Evolutionary Approach for Test Suite
                        Reduction},
      journal = {Int. J. Softw. Eng. Knowl. Eng.},
      volume = {32},
      number = {2},
      pages = {193--225},
      year = {2022},
      url = {https://doi.org/10.1142/S0218194022500085},
      doi = {10.1142/S0218194022500085},
      timestamp = {Wed, 07 Dec 2022 23:05:18 +0100},
      biburl = {https://dblp.org/rec/journals/ijseke/CovielloRSA22.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  11. [11]D. Humeniuk, F. Khomh, and G. Antoniol, “A search-based framework for automatic generation of testing environments for cyber-physical systems,” Inf. Softw. Technol., vol. 149, p. 106936, 2022, . doi
    Bibtex
      @article{DBLP:journals/infsof/HumeniukKA22,
      author = {Humeniuk, Dmytro and Khomh, Foutse and Antoniol, Giuliano},
      title = {A search-based framework for automatic generation of testing environments
                        for cyber-physical systems},
      journal = {Inf. Softw. Technol.},
      volume = {149},
      pages = {106936},
      year = {2022},
      url = {https://doi.org/10.1016/j.infsof.2022.106936},
      doi = {10.1016/J.INFSOF.2022.106936},
      timestamp = {Tue, 16 Aug 2022 23:05:36 +0200},
      biburl = {https://dblp.org/rec/journals/infsof/HumeniukKA22.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  12. [12]L. Ardito, M. Torchiano, R. Coppola, and G. Antoniol, “PowTrAn: An R Package for power trace analysis,” SoftwareX, vol. 12, p. 100512, 2020, . doi
    Bibtex
      @article{DBLP:journals/softx/ArditoTCA20,
      author = {Ardito, Luca and Torchiano, Marco and Coppola, Riccardo and Antoniol, Giulio},
      title = {PowTrAn: An {R} Package for power trace analysis},
      journal = {SoftwareX},
      volume = {12},
      pages = {100512},
      year = {2020},
      url = {https://doi.org/10.1016/j.softx.2020.100512},
      doi = {10.1016/j.softx.2020.100512},
      timestamp = {Mon, 03 Jan 2022 22:05:31 +0100},
      biburl = {https://dblp.org/rec/journals/softx/ArditoTCA20.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  13. [13]R. Morales, F. Khomh, and G. Antoniol, “ RePOR: Mimicking Humans on Refactoring Tasks. Are We There Yet?,” Journal of Empirical Software Engineering (EMSE), 2020.
    Bibtex
      @article{DBLP:journals/emse/Morales2020,
      author = {Morales, Rodrigo and Khomh, Foutse and Antoniol, Giuliano},
      title = { RePOR: Mimicking Humans on Refactoring Tasks. Are We There Yet?,},
      journal = {Journal of Empirical Software Engineering (EMSE)},
      booktitle = {EMSE},
      year = {2020}
    }
    
      
  14. [14]C. Coviello, S. Romano, G. Scanniello, A. Marchetto, A. Corazza, and G. Antoniol, “Adequate vs. inadequate test suite reduction approaches,” Information & Software Technology, vol. 119, 2020, . doi
    Bibtex
      @article{DBLP:journals/infsof/CovielloRSMCA20,
      author = {Coviello, Carmen and Romano, Simone and Scanniello, Giuseppe and Marchetto, Alessandro and Corazza, Anna and Antoniol, Giuliano},
      title = {Adequate vs. inadequate test suite reduction approaches},
      journal = {Information {\&} Software Technology},
      volume = {119},
      year = {2020},
      url = {https://doi.org/10.1016/j.infsof.2019.106224},
      doi = {10.1016/j.infsof.2019.106224},
      timestamp = {Thu, 06 Feb 2020 18:12:34 +0100},
      biburl = {https://dblp.org/rec/bib/journals/infsof/CovielloRSMCA20},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  15. [15]G. Antoniol and F. Sarro, “Guest editorial: Special section on Search-based Software Engineering track at GECCO 2018,” Information & Software Technology, vol. 118, 2020, . doi
    Bibtex
      @article{DBLP:journals/infsof/AntoniolS20,
      author = {Antoniol, Giuliano and Sarro, Federica},
      title = {Guest editorial: Special section on Search-based Software Engineering
                     track at {GECCO} 2018},
      journal = {Information {\&} Software Technology},
      volume = {118},
      year = {2020},
      url = {https://doi.org/10.1016/j.infsof.2019.106223},
      doi = {10.1016/j.infsof.2019.106223},
      timestamp = {Tue, 17 Dec 2019 10:02:41 +0100},
      biburl = {https://dblp.org/rec/bib/journals/infsof/AntoniolS20},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  16. [16]D. Johannes, F. Khomh, and G. Antoniol, “A large-scale empirical study of code smells in JavaScript projects,” Software Quality Journal, vol. 27, no. 3, pp. 1271–1314, 2019, . doi
    Bibtex
      @article{DBLP:journals/sqj/JohannesKA19,
      author = {Johannes, David and Khomh, Foutse and Antoniol, Giuliano},
      title = {A large-scale empirical study of code smells in JavaScript projects},
      journal = {Software Quality Journal},
      volume = {27},
      number = {3},
      pages = {1271--1314},
      year = {2019},
      url = {https://doi.org/10.1007/s11219-019-09442-9},
      doi = {10.1007/s11219-019-09442-9},
      timestamp = {Sat, 12 Oct 2019 12:51:21 +0200},
      biburl = {https://dblp.org/rec/bib/journals/sqj/JohannesKA19},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  17. [17]R. Saborido, R. Morales, F. Khomh, Y.-G. Gueheneuc, and G. Antoniol, “Getting the most from map data structures in Android,” Empirical Software Engineering, Mar. 2018, . <a href="http://doi.org/"10.1007/s10664-018-9607-8">doi</a>
    Bibtex
      @article{Saborido2018,
      author = {Saborido, Ruben and Morales, Rodrigo and Khomh, Foutse and Gueheneuc, Yann-Gael and Antoniol, Giuliano},
      title = {Getting the most from map data structures in Android},
      journal = {Empirical Software Engineering},
      year = {2018},
      month = mar,
      issn = {1573-7616},
      doi = {"10.1007/s10664-018-9607-8},
      url = {https://doi.org/10.1007/s10664-018-9607-8}
    }
    
      
    Abstract A map is a data structure that is commonly used to store data as key–value pairs and retrieve data as keys, values, or key–value pairs. Although Java offers different map implementation classes, Android SDK offers other implementations supposed to be more efficient than HashMap: ArrayMap and SparseArray variants (SparseArray, LongSparseArray, SparseIntArray, SparseLongArray, and SparseBooleanArray). Yet, the performance of these implementations in terms of CPU time, memory usage, and energy consumption is lacking in the official Android documentation; although saving CPU, memory, and energy is a major concern of users wanting to increase battery life. Consequently, we study the use of map implementations by Android developers in two ways. First, we perform an observational study of 5713 Android apps in GitHub. Second, we conduct a survey to assess developers’ perspective on Java and Android map implementations. Then, we perform an experimental study comparing HashMap, ArrayMap, and SparseArray variants map implementations in terms of CPU time, memory usage, and energy consumption. We conclude with guidelines for choosing among the map implementations: HashMap is preferable over ArrayMap to improve energy efficiency of apps, and SparseArray variants should be used instead of HashMap and ArrayMap when keys are primitive types.
  18. [18]R. Morales, F. Chicano, F. Khomh, and G. Antoniol, “Exact search-space size for the refactoring scheduling problem,” Autom. Softw. Eng., vol. 25, no. 2, pp. 195–200, 2018, . doi
    Bibtex
      @article{DBLP:journals/ase/MoralesCKA18,
      author = {Morales, Rodrigo and Chicano, Francisco and Khomh, Foutse and Antoniol, Giuliano},
      title = {Exact search-space size for the refactoring scheduling problem},
      journal = {Autom. Softw. Eng.},
      volume = {25},
      number = {2},
      pages = {195--200},
      year = {2018},
      url = {https://doi.org/10.1007/s10515-017-0213-6},
      doi = {10.1007/s10515-017-0213-6},
      timestamp = {Tue, 15 May 2018 12:18:05 +0200},
      biburl = {https://dblp.org/rec/bib/journals/ase/MoralesCKA18},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  19. [19]S. Romano, G. Scanniello, G. Antoniol, and A. Marchetto, “SPIRITuS: a SimPle Information Retrieval regressIon Test Selection approach,” International Journal on Information and Software Technology, pp. To-Appear, 2018. doi
    Bibtex
      @article{Spiritus2018,
      author = {Romano, Simone and Scanniello, Giuseppe and Antoniol, Giulio and Marchetto, Alessandro},
      title = {SPIRITuS: a SimPle Information Retrieval regressIon Test Selection approach},
      journal = {International Journal on Information and Software Technology},
      volume = {},
      pages = {To-Appear},
      year = {2018},
      url = {},
      doi = {}
    }
    
      
  20. [20]R. Morales, F. Chicano, F. Khomh, and G. Antoniol, “Efficient refactoring scheduling based on partial order reduction,” Journal of Systems and Software, vol. 145, pp. 25–51, 2018, . doi
    Bibtex
      @article{DBLP:journals/jss/MoralesCKA18,
      author = {Morales, Rodrigo and Chicano, Francisco and Khomh, Foutse and Antoniol, Giuliano},
      title = {Efficient refactoring scheduling based on partial order reduction},
      journal = {Journal of Systems and Software},
      volume = {145},
      pages = {25--51},
      year = {2018},
      url = {https://doi.org/10.1016/j.jss.2018.07.076},
      doi = {10.1016/j.jss.2018.07.076},
      timestamp = {Fri, 02 Nov 2018 15:38:55 +0100},
      biburl = {https://dblp.org/rec/bib/journals/jss/MoralesCKA18},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  21. [21]Z. Soh, F. Khomh, Y.-G. Guéhéneuc, and G. Antoniol, “Noise in Mylyn interaction traces and its impact on developers and recommendation systems,” Empirical Software Engineering Journal, vol. 23, pp. 645–692, 2018. doi
    Bibtex
      @article{Zephiryn1017,
      author = {Soh, Z{\'{e}}phyrin and Khomh, Foutse and Gu{\'{e}}h{\'{e}}neuc, Yann{-}Ga{\"{e}}l and Antoniol, Giuliano},
      title = {Noise in Mylyn interaction traces and its impact on
      developers and recommendation systems},
      journal = {Empirical Software Engineering Journal},
      volume = {23},
      pages = {645-692},
      year = {2018},
      url = {},
      doi = {}
    }
    
      
  22. [22]R. Morales, R. Saborido, F. Khomh, F. Chicano, and G. Antoniol, “EARMO: An Energy-Aware Refactoring Approach for Mobile Apps,” IEEE Transactions on Software Engineering, pp. 1–1, Jan. 2017, . doi
    Bibtex
      @article{Morales2018,
      title = {EARMO: An Energy-Aware Refactoring Approach for Mobile Apps},
      journal = {IEEE Transactions on Software Engineering},
      year = {2017},
      month = jan,
      pages = {1 - 1},
      issn = {0098-5589},
      doi = {10.1109/TSE.2017.2757486},
      url = {http://ieeexplore.ieee.org/document/8052533/http://xplorestaging.ieee.org/ielx7/32/4359463/08052533.pdf?arnumber=8052533},
      author = {Morales, Rodrigo and Saborido, Ruben and Khomh, Foutse and Chicano, Francisco and Antoniol, Giuliano}
    }
    
      
  23. [23]R. Morales, Z. Soh, F. Khomh, G. Antoniol, and F. Chicano, “On the use of developers’ context for automatic refactoring of software anti-patterns,” Journal of Systems and Software, vol. 128, pp. 236–251, 2017, . doi
    Bibtex
      @article{MoralesSKAC17,
      author = {Morales, Rodrigo and Soh, Z{\'{e}}phyrin and Khomh, Foutse and Antoniol, Giuliano and Chicano, Francisco},
      title = {On the use of developers' context for automatic refactoring of software
                     anti-patterns},
      journal = {Journal of Systems and Software},
      volume = {128},
      pages = {236--251},
      year = {2017},
      url = {https://doi.org/10.1016/j.jss.2016.05.042},
      doi = {10.1016/j.jss.2016.05.042}
    }
    
      
  24. [24]W. Li, J. Hayes, G. Antoniol, Y. G. Guéhéneuc, and B. Adams, “Error leakage and wasted time: sensitivity and effort analysis of a requirements consistency checking process,” Journal of Software: Evolution and Process, vol. 28, no. 12, pp. 1061–1080, 2016, . doi
    Bibtex
      @article{Wenbin2016,
      author = {Li, Wenbin and Hayes, Jane and Antoniol, Giulio and Gu{\'{e}}h{\'{e}}neuc, Yann Ga{\"{e}}l and Adams, Bram},
      title = {Error leakage and wasted time: sensitivity and effort analysis of a requirements consistency checking process},
      journal = {Journal of Software: Evolution and Process},
      volume = {28},
      number = {12},
      issn = {2047-7481},
      ee = {http://dx.doi.org/10.1002/smr.1819},
      doi = {10.1002/smr.1819},
      pages = {1061--1080},
      year = {2016}
    }
    
      
    Abstract Several techniques are used by requirements engineering practitioners to address difficult problems such as specifying precise requirements while using inherently ambiguous natural language text and ensuring the consistency of requirements. Often, these problems are addressed by building processes/tools that combine multiple techniques where the output from 1 technique becomes the input to the next. While powerful, these techniques are not without problems. Inherent errors in each technique may leak into the subsequent step of the process. We model and study 1 such process, for checking the consistency of temporal requirements, and assess error leakage and wasted time. We perform an analysis of the input factors of our model to determine the effect that sources of uncertainty may have on the final accuracy of the consistency checking process. Convinced that error leakage exists and negatively impacts the results of the overall consistency checking process, we perform a second simulation to assess its impact on the analysts’ efforts to check requirements consistency. We show that analyst’s effort varies depending on the precision and recall of the subprocesses and that the number and capability of analysts affect their effort. We share insights gained and discuss applicability to other processes built of piped techniques.
  25. [25]A. Sabane, Y. G. Guéhéneuc, V. Arnaudova, and G. Antoniol, “Fragile Base-class Problem, Problem?,” Empirical Software Engineering, pp. 1–46, 2016, . doi
    Bibtex
      @article{Aminta2016ense,
      author = {Sabane, Aminata and Gu{\'{e}}h{\'{e}}neuc, Yann Ga{\"{e}}l and Arnaudova, Venera and Antoniol, Giuliano},
      title = {Fragile Base-class Problem, Problem?},
      journal = {Empirical Software Engineering},
      pages = {1--46},
      year = {2016},
      doi = {10.1007/s10664-016-9448-2},
      ee = {http://dx.doi.org/10.1007/s10664-016-9448-2}
    }
    
      
    Abstract The fragile base-class problem (FBCP) has been described in the literature as a consequence of “misusing”inheritance and composition in object-oriented programming when (re)using frameworks. Many research works have focused on preventing the FBCP by proposing alternative mechanisms for reuse, but, to the best of our knowledge, there is no previous research work studying the prevalence and impact of the FBCP in real-world software systems. The goal of our work is thus twofold: (1) assess, in different systems, the prevalence of micro-architectures, called FBCS, that could lead to two aspects of the FBCP, (2) investigate the relation between the detected occurrences and the quality of the systems in terms of change and fault proneness, and (3) assess whether there exist bugs in these systems that are related to the FBCP. We therefore perform a quantitative and a qualitative study. Quantitatively, we analyse multiple versions of seven different open-source systems that use 58 different frameworks, resulting in 301 configurations. We detect in these systems 112,263 FBCS occurrences and we analyse whether classes playing the role of sub-classes in FBCS occurrences are more change and–or fault prone than other classes. Results show that classes participating in the analysed FBCS are neither more likely to change nor more likely to have faults. Qualitatively, we conduct a survey to confirm/infirm that some bugs are related to the FBCP. The survey involves 41 participants that analyse a total of 104 bugs of three open-source systems. Results indicate that none of the analysed bugs is related to the FBCP. Thus, despite large, rigorous quantitative and qualitative studies, we must conclude that the two aspects of the FBCP that we analyse may not be as problematic in terms of change and fault-proneness as previously thought in the literature. We propose reasons why the FBCP may not be so prevalent in the analysed systems and in other systems in general.
  26. [26]R. Morales, Z. Soh, F. Khomh, G. Antoniol, and F. Chicano, “On the use of developers’ context for automatic refactoring of software anti-patterns ,” Journal of Systems and Software , p. - , 2016, . doi
    Bibtex
      @article{Morales2016,
      title = {On the use of developers’ context for automatic refactoring of software anti-patterns },
      journal = {Journal of Systems and Software },
      volume = {},
      number = {},
      pages = { - },
      year = {2016},
      note = {},
      issn = {0164-1212},
      doi = {http://dx.doi.org/10.1016/j.jss.2016.05.042},
      ee = {http://www.sciencedirect.com/science/article/pii/S0164121216300632},
      author = {Morales, Rodrigo and Soh, Zéphyrin and Khomh, Foutse and Antoniol, Giuliano and Chicano, Francisco}
    }
    
      
    Abstract Abstract Anti-patterns are poor solutions to design problems that make software systems hard to understand and extend. Entities involved in anti-patterns are reported to be consistently related to high change and fault rates. Refactorings, which are behavior preserving changes are often performed to remove anti-patterns from software systems. Developers are advised to interleave refactoring activities with their regular coding tasks to remove anti-patterns, and consequently improve software design quality. However, because the number of anti-patterns in a software system can be very large, and their interactions can require a solution in a set of conflicting objectives, the process of manual refactoring can be overwhelming. To automate this process, previous works have modeled anti-patterns refactoring as a batch process where a program provides a solution for the total number of classes in a system, and the developer has to examine a long list of refactorings, which is not feasible in most situations. Moreover, these proposed solutions often require that developers modify classes on which they never worked before (i.e., classes on which they have little or no knowledge). To improve on these limitations, this paper proposes an automated refactoring approach, ReCon (Refactoring approach based on task Context), that leverages information about a developer’s task (i.e., the list of code entities relevant to the developer’s task) and metaheuristics techniques to compute the best sequence of refactorings that affects only entities in the developer’s context. We mine 1705 task contexts (collected using the Eclipse plug-in Mylyn) and 1013 code snapshots from three open-source software projects (Mylyn, PDE, Eclipse Platform) to assess the performance of our proposed approach. Results show that ReCon can remove more than 50% of anti-patterns in a software system, using fewer resources than the traditional approaches from the literature.
  27. [27]L. Guerrouj et al., “An Empirical Study on the Impact of Lexical Smells on Change- and Fault- Proneness,” Software Quality Journal, 2016.
    Bibtex
      @article{latifa2016,
      author = {Guerrouj, Latifa and Kermansaravi, Zeinab and Arnaoudouva, Venera and Fung, Benjamin and Khomh, Foutse and Antoniol, Giuliano and Gueheneuc, Yann-Gael},
      title = {An Empirical Study on the Impact of Lexical Smells on Change- and Fault- Proneness},
      journal = {Software Quality Journal},
      year = {2016},
      ee = {http://link.springer.com/article/10.1007/s11219-016-9318-6}
    }
    
      
    Abstract Anti-patterns are poor design choices that hinder code evolution, and understandability. Practitioners perform refactoring, that are semantic-preserving-code transformations, to correct anti-patterns and to improve design quality. However, manual refactoring is a consuming task and a heavy burden for developers who have to struggle to complete their coding tasks and maintain the design quality of the system at the same time. For that reason, researchers and practitioners have proposed several approaches to bring automated support to developers, with solutions that ranges from single anti-patterns correction, to multiobjective solutions. The latter attempt to reduce refactoring effort, or to improve semantic similarity between classes and methods in addition to remove anti-patterns. To the best of our knowledge none of the previous approaches have considered the impact of refactoring on another important aspect of software development, which is the testing effort. In this paper we propose a novel search-based multiobjective approach for removing five well-know anti-patterns and minimizing testing effort. To assess the effectiveness of our proposed approach, we implement three different multiobjective metaheuristics (NSGA-II, SPEA2, MOCell) and apply them to a benchmark comprised of four open-source systems. Results show that MOCell is the metaheuristic that provides the best performance.
  28. [28]W. Wu, F. Khomh, B. Adams, Y. G. Guéhéneuc, and G. Antoniol, “An exploratory study of api changes and usages based on apache and eclipse ecosystems,” Empirical Software Engineering, vol. 21, no. 6, pp. 2366–2412, 2016, . doi
    Bibtex
      @article{Wei2015emse,
      author = {Wu, Wei and Khomh, Foutse and Adams, Bram and Gu{\'{e}}h{\'{e}}neuc, Yann Ga{\"{e}}l and Antoniol, Giuliano},
      title = {An exploratory study of api changes and usages based on apache and eclipse ecosystems},
      journal = {Empirical Software Engineering},
      volume = {21},
      number = {6},
      pages = {2366--2412},
      year = {2016},
      url = {http://dx.doi.org/10.1007/s10664-015-9411-7},
      doi = {10.1007/s10664-015-9411-7}
    }
    
      
    Abstract Frameworks are widely used in modern software development to reduce development costs. They are accessed through their Application Programming Interfaces (APIs), which specify the contracts with client programs. When frameworks evolve, API backward-compatibility cannot always be guaranteed and client programs must upgrade to use the new releases. Because framework upgrades are not cost-free, observing API changes and usages together at fine-grained levels is necessary to help developers understand, assess, and forecast the cost of each framework upgrade. Whereas previous work studied API changes in frameworks and API usages in client programs separately, we analyse and classify API changes and usages together in 22 framework releases from the Apache and Eclipse ecosystems and their client programs. We find that (1) missing classes and methods happen more often in frameworks and affect client programs more often than the other API change types do, (2) missing interfaces occur rarely in frameworks but affect client programs often, (3) framework APIs are used on average in 35% of client classes and interfaces, (4) most of such usages could be encapsulated locally and reduced in number, and (5) about 11% of APIs usages could cause ripple effects in client programs when these APIs change. Based on these findings, we provide suggestions for developers and researchers to reduce the impact of API evolution through language mechanisms and design strategies.
  29. [29]N. Ali, Z. Sharafi, Y. G. Guéhéneuc, and G. Antoniol, “An empirical study on the importance of source code entities for requirements traceability,” Empirical Software Engineering, vol. 20, no. 2, pp. 442–478, 2015.
    Bibtex
      @article{AliSGA15,
      author = {Ali, Nasir and Sharafi, Zohreh and Gu{\'{e}}h{\'{e}}neuc, Yann Ga{\"{e}}l and Antoniol, Giuliano},
      title = {An empirical study on the importance of source code entities for requirements
                     traceability},
      journal = {Empirical Software Engineering},
      volume = {20},
      number = {2},
      pages = {442--478},
      year = {2015}
    }
    
      
    Abstract Requirements Traceability (RT) links help developers during program compre- hension and maintenance tasks. However, creating RT links is a laborious and resource- consuming task. Information Retrieval (IR) techniques are useful to automatically create traceability links. However, IR-based techniques typically have low accuracy (precision, recall, or both) and thus, creating RT links remains a human intensive process. We conjec- ture that understanding how developers verify RT links could help improve the accuracy of IR-based RT techniques to create RT links. Consequently, we perform an empirical study consisting of four case studies. First, we use an eye-tracking system to capture develop- ers’ eye movements while they verify RT links. We analyse the obtained data to identify and rank developers’ preferred types of Source Code Entities (SCEs), e.g., domain vs. implementation-level source code terms and class names vs. method names. Second, we perform another eye-tracking case study to confirm that it is the semantic content of the developers’ preferred types of SCEs and not their locations that attract developers’ atten- tion and help them in their task to verify RT links. Third, we propose an improved term weighting scheme, i.e., Developers Preferred Term Frequency/Inverse Document Frequency (DPTF/IDF), that uses the knowledge of the developers’ preferred types of SCEs to give more importance to these SCEs into the term weighting scheme. We integrate this weighting scheme with an IR technique, i.e., Latent Semantic Indexing (LSI), to create a new technique to RT link recovery. Using three systems (iTrust, Lucene, and Pooka), we show that the proposed technique statistically improves the accuracy of the recovered RT links over a technique based on LSI and the usual Term Frequency/Inverse Docu- ment Frequency (TF/IDF) weighting scheme. Finally, we compare the newly proposed DPTF/IDF with our original Domain Or Implementation/Inverse Document Frequency (DOI/IDF) weighting scheme.
  30. [30]V. Arnaoudova, M. D. Penta, and G. Antoniol, “Linguistic Antipatterns: What They are and How Developers Perceive Them,” Empirical Software Engineering (EMSE), pp. 104–158, Jan. 2015, [Online]. Available at: /wp-content/uploads/2014/10/2014-EMSE-Arnaodova-et-al-Perception-LAs.pdf.
    Bibtex
      @article{LAsPerception-15,
      title = {Linguistic Antipatterns: What They are and How Developers Perceive Them},
      author = {Arnaoudova, Venera and Penta, Massimiliano Di and Antoniol, Giuliano},
      url = {/wp-content/uploads/2014/10/2014-EMSE-Arnaodova-et-al-Perception-LAs.pdf},
      year = {2015},
      date = {2015-01-29},
      journal = {Empirical Software Engineering (EMSE)},
      pages = {104-158},
      keywords = {developers' perception, empirical study, linguistic antipatterns, source code identifiers}
    }
    
      
    Abstract Antipatterns are known as poor solutions to recurring problems. For example, Brown et al. and Fowler define practices concerning poor design or implementation solutions. However, we know that the source code lexicon is part of the factors that affect the psychological complexity of a program, i.e., factors that make a program difficult to understand and maintain by humans. The aim of this work is to identify recurring poor practices related to inconsistencies among the naming, documentation, and implementation of an entity—called Linguistic Antipatterns (LAs)—that may impair program understanding. To this end, we first mine examples of such inconsistencies in real open-source projects and abstract them into a catalog of 17 recurring LAs related to methods and attributes1. Then, to understand the relevancy of LAs, we perform two empirical studies with developers—30 external (i.e., not familiar with the code) and 14 internal (i.e., people developing or maintaining the code). Results indicate that the majority of the participants perceive LAs as poor practices and therefore must be avoided—69% and 51% of the external and internal developers, respectively. As further evidence of LAs’ validity, open source developers that were made aware of LAs reacted to the issue by making code changes in 10% of the cases. Finally, in order to facilitate the use of LAs in practice, we identified a sub-set of LAs which were universally agreed upon as being problematic; those which had a clear dissonance between code behavior and lexicon.
  31. [31]S. Medini, V. Arnaoudova, M. D. Penta, G. Antoniol, Y.-G. Guéhéneuc, and P. Tonella, “SCAN: An Approach to Label and Relate Execution Trace Segments,” Journal of Software: Evolution and Process (JSEP), vol. 26, no. 11, pp. 962–995, Jan. 2014.
    Bibtex
      @article{SCAN-14,
      title = {SCAN: An Approach to Label and Relate Execution Trace Segments},
      author = {Medini, Soumaya and Arnaoudova, Venera and Penta, Massimiliano Di and Antoniol, Giuliano and Guéhéneuc, Yann-Gaël and Tonella, Paolo},
      year = {2014},
      date = {2014-01-01},
      journal = {Journal of Software: Evolution and Process (JSEP)},
      volume = {26},
      number = {11},
      pages = {962--995}
    }
    
      
    Abstract Program comprehension is a prerequisite to any maintenance and evolution task. In particular, when performing feature location, developers perform program comprehension by abstracting software features and identifying the links between high-level abstractions (features) and program elements. We present Segment Concept AssigNer (SCAN), an approach to support developers in feature location. SCAN uses a search-based approach to split execution traces into cohesive segments. Then, it labels the segments with relevant keywords and, finally, uses formal concept analysis to identify relations among segments. In a first study, we evaluate the performances of SCAN on six Java programs by 31 participants. We report an average precision of 69% and a recall of 63% when comparing the manual and automatic labels and a precision of 63% regarding the relations among segments identified by SCAN. After that, we evaluate the usefulness of SCAN for the purpose of feature location on two Java programs. We provide evidence that SCAN (i) identifies 69% of the gold set methods and (ii) is effective in reducing the quantity of information that developers must process to locate features—reducing the number of methods to understand by an average of 43% compared to the entire execution traces.
  32. [32]Smet Benoı̂t De, L. Lempereur, Z. Sharafi, Y.-G. Guéhéneuc, G. Antoniol, and N. Habra, “Taupe: Visualizing and analyzing eye-tracking data,” Sci. Comput. Program., vol. 79, pp. 260–278, 2014.
    Bibtex
      @article{1s20S0167642312000135main,
      author = {Smet, Beno\^{\i}t De and Lempereur, Lorent and Sharafi, Zohreh and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano and Habra, Naji},
      title = {Taupe: Visualizing and analyzing eye-tracking data},
      journal = {Sci. Comput. Program.},
      volume = {79},
      year = {2014},
      pages = {260-278},
      ee = {http://dx.doi.org/10.1016/j.scico.2012.01.004},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2014/1s20S0167642312000135main.pdf}
    }
    
      
    Abstract Program comprehension is an essential part of any maintenance activity. It allows developers to build mental models of the program before undertaking any change. It has been studied by the research community for many years with the aim to devise models and tools to understand and ease this activity. Recently, researchers have introduced the use of eye-tracking devices to gather and analyze data about the developers’ cognitive processes during program comprehension. However, eye-tracking devices are not completely reliable and, thus, recorded data sometimes must be processed, filtered, or corrected. Moreover, the analysis software tools packaged with eye-tracking devices are not open-source and do not always provide extension points to seamlessly integrate new sophisticated analyses. Consequently, we develop the Taupe software system to help researchers visualize, analyze, and edit the data recorded by eye-tracking devices. The two main objectives of Taupe are compatibility and extensibility so that researchers can easily: (1) apply the system on any eye-tracking data and (2) extend the system with their own analyses. To meet our objectives, we base the development of Taupe: (1) on well-known good practices, such as design patterns and a plug-in architecture using reflection, (2) on a thorough documentation, validation, and verification process, and (3) on lessons learned from existing analysis software systems. This paper describes the context of development of Taupe, the architectural and design choices made during its development, and its documentation, validation and verification process. It also illustrates the application of Taupe in three experiments on the use of design patterns by developers during program comprehension.
  33. [33]V. Arnaoudova, L. M. Eshkevari, M. D. Penta, R. Oliveto, G. Antoniol, and Y.-G. Guéhéneuc, “REPENT: Analyzing the Nature of Identifier Renamings,” IEEE Trans. Software Eng., vol. 40, no. 5, pp. 502–532, 2014.
    Bibtex
      @article{journals/tse/ArnaoudovaEPOAG14,
      author = {Arnaoudova, Venera and Eshkevari, Laleh Mousavi and Penta, Massimiliano Di and Oliveto, Rocco and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {REPENT: Analyzing the Nature of Identifier Renamings},
      journal = {IEEE Trans. Software Eng.},
      volume = {40},
      number = {5},
      year = {2014},
      pages = {502-532},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2014.2312942},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  34. [34]F. Jaafar, Y.-G. Guéhéneuc, S. Hamel, and G. Antoniol, “Detecting asynchrony and dephase change patterns by mining software repositories,” Journal of Software: Evolution and Process, vol. 26, no. 1, pp. 77–106, 2014.
    Bibtex
      @article{journals/smr/JaafarGHA14,
      author = {Jaafar, Fehmi and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Hamel, Sylvie and Antoniol, Giuliano},
      title = {Detecting asynchrony and dephase change patterns by mining software repositories},
      journal = {Journal of Software: Evolution and Process},
      volume = {26},
      number = {1},
      year = {2014},
      pages = {77-106},
      ee = {http://dx.doi.org/10.1002/smr.1635},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  35. [35]S. Kpodjedo, P. Galinier, and G. Antoniol, “Using local similarity measures to efficiently address approximate graph matching,” Discrete Applied Mathematics, vol. 164, pp. 161–177, 2014.
    Bibtex
      @article{journals/dam/KpodjedoGA14,
      author = {Kpodjedo, Segla and Galinier, Philippe and Antoniol, Giuliano},
      title = {Using local similarity measures to efficiently address approximate graph matching},
      journal = {Discrete Applied Mathematics},
      volume = {164},
      year = {2014},
      pages = {161-177},
      ee = {http://dx.doi.org/10.1016/j.dam.2012.01.019},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  36. [36]L. Guerrouj, M. D. Penta, Y. G. Guéhéneuc, and G. Antoniol, “An experimental investigation on the effects of context on source code identifiers splitting and expansion,” Empirical Software Engineering, vol. 19, no. 6, pp. 1706–1753, 2014, . doi
    Bibtex
      @article{journals/ese/GuerroujPGA14,
      author = {Guerrouj, Latifa and Penta, Massimiliano Di and Gu{\'{e}}h{\'{e}}neuc, Yann Ga{\"{e}}l and Antoniol, Giuliano},
      title = {An experimental investigation on the effects of context on source code identifiers splitting and expansion},
      journal = {Empirical Software Engineering},
      volume = {19},
      number = {6},
      pages = {1706--1753},
      year = {2014},
      url = {http://dx.doi.org/10.1007/s10664-013-9260-1},
      doi = {10.1007/s10664-013-9260-1}
    }
    
      
    Abstract Recent and past studies indicate that source code lexicon plays an important role in program comprehension. Developers often compose source code identifiers with abbreviated words and acronyms, and do not always use consistent mechanisms and explicit separators when creating identifiers. Such choices and inconsistencies impede the work of developers that must understand identifiers by decomposing them into their component terms, and mapping them onto dictionary, application or domain words. When software documentation is scarce, outdated or simply not available, developers must therefore use the available contextual information to understand the source code. This paper aims at investigating how developers split and expand source code identifiers, and, specifically, the extent to which different kinds of contextual information could support such a task. In particular, we consider (i) an internal context consisting of the content of functions and source code files in which the identifiers are located, and (ii) an external context involving external documentation. We conducted a family of two experiments with 63 participants, including bachelor, master, Ph.D. students, and post-docs. We randomly sampled a set of 50 identifiers from a corpus of open source C programs and we asked participants to split and expand them with the availability (or not) of internal and external contexts. We report evidence on the usefulness of contextual information for identifier splitting and acronym/abbreviation expansion. We observe that the source code files are more helpful than just looking at function source code, and that the application-level contextual information does not help any further. The availability of external sources of information only helps in some circumstances. Also, in some cases, we observe that participants better expanded acronyms than abbreviations, although in most cases both exhibit the same level of accuracy. Finally, results indicated that the knowledge of English plays a significant effect in identifier splitting/expansion. The obtained results confirm the conjecture that contextual information is useful in program comprehension, including when developers split and expand identifiers to understand them. We hypothesize that the integration of identifier splitting and expansion tools with IDE could help to improve developers’ productivity.
  37. [37]W. Wu, A. Serveaux, Y. G. Guéhéneuc, and G. Antoniol, “The Impact of Imperfect Change Rules on Framework API Evolution Identification: An Empirical Study,” Empirical Software Engineering, vol. 20, pp. 1126–1158, 2014.
    Bibtex
      @article{Wei2014emse,
      author = {Wu, Wei and Serveaux, Adrien and Gu{\'{e}}h{\'{e}}neuc, Yann Ga{\"{e}}l and Antoniol, Giuliano},
      title = {The Impact of Imperfect Change Rules on Framework API Evolution Identification: An Empirical Study},
      journal = {Empirical Software Engineering},
      volume = {20},
      number = {},
      pages = {1126--1158},
      year = {2014}
    }
    
      
    Abstract Softwareframeworkskeepevolving.Itisoftentime-consumingfordevelopersto keep their client code up-to-date. Not all frameworks have documentation about the upgrad- ing process. Many approaches have been proposed to ease the impact of non-documented framework evolution on developers by identifying change rules between two releases of a framework, but these change rules are imperfect, i.e., not 100 % correct. To the best of our knowledge, there is no empirical study to show the usefulness of these imperfect change rules. Therefore, we design and conduct an experiment to evaluate their impact. In the experiment, the subjects must find the replacements of 21 missing methods in the new releases of three open-source frameworks with the help of (1) all-correct, (2) imperfect, and (3) no change rules. The statistical analysis results show that the precision of the replace- ments found by the subjects with the three sets of change rules are significantly different. The precision with all-correct change rules is the highest while that with no change rules is the lowest, while imperfect change rules give a precision in between. The effect size of the difference between the subjects with no and imperfect change rules is large and that between the subjects with imperfect and correct change rules is moderate. The results of this study show that the change rules generated by framework API evolution approaches do help developers, even they are not always correct. The imperfect change rules can be used by developers upgrading their code when documentation is not available or as a complement to partial documentation. The moderate difference between results from subjects with imper- fect and all-correct change rules also suggests that improving precision of change rules will still help developers
  38. [38]S. Kpodjedo, F. Ricca, P. Galinier, G. Antoniol, and Y.-G. Guéhéneuc, “MADMatch: Many-to-Many Approximate Diagram Matching for Design Comparison,” IEEE Trans. Software Eng., vol. 39, no. 8, pp. 1090–1111, 2013.
    Bibtex
      @article{06464271,
      author = {Kpodjedo, Segla and Ricca, Filippo and Galinier, Philippe and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {MADMatch: Many-to-Many Approximate Diagram Matching for Design Comparison},
      journal = {IEEE Trans. Software Eng.},
      volume = {39},
      number = {8},
      year = {2013},
      pages = {1090-1111},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2013.9},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2013/06464271.pdf}
    }
    
      
    Abstract Matching algorithms play a fundamental role in many important but difficult software engineering activities, especially design evolution analysis and model compari son. We present MADMatch, a fast and scalable Many-to-many Approximate Diagram Matching approach based on an Error-Tolerant Graph matching (ETGM) formulation. Diag rams are represented as graphs, costs are assigned to possible differences between two given graphs, and the goal is to retrieve the cheapest matching. We address the resulting optimisation problem with a tabu search enhanced by the novel use of lexical and structural information. Through several case studies with different types of diagrams and tasks, we show that our generic approach obtains better results than dedicated state-of-the-art algorithms, such as AURA, PLTSDiff or UMLDiff, on the exact same datasets used to introduce (and evaluate) these algorithms.
  39. [39]N. Ali, Y.-G. Guéhéneuc, and G. Antoniol, “Trustrace: Mining Software Repositories to Improve the Accuracy of Requirement Traceability Links,” IEEE Trans. Software Eng., vol. 39, no. 5, pp. 725–741, 2013.
    Bibtex
      @article{06341764,
      author = {Ali, Nasir and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Trustrace: Mining Software Repositories to Improve the Accuracy of Requirement Traceability Links},
      journal = {IEEE Trans. Software Eng.},
      volume = {39},
      number = {5},
      year = {2013},
      pages = {725-741},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2012.71},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2013/06341764.pdf}
    }
    
      
    Abstract Traceability is the only means to ensure that the source code of a system is consistent with its requirements and that all and only the specified requirements have been implemented by developers. During software maintenance and evolution, requirement traceability links become obsolete because developers do not/cannot devote effort to update them. Yet, recovering these traceability links later is a daunting and costly task for developers. Consequently, the literature proposed methods, techniques, and tools to recover these traceability links semi-automatically or automatically. Among the proposed techniques, the literature showed that information retrieval (IR) techniques can automatically recover traceability links between free-text requirements and source code. However, IR techniques lack accuracy (precision and recall). In this paper, we show that mining software repositories and combining mined results with IR techniques can improve the accuracy (precision and recall) of IR techniques and we propose Trustrace, a trust-based traceability recovery approach. We apply Trustrace on four medium-size open-source systems to compare the accuracy of its traceability links with those recovered using state-of-the-art IR techniques from the literature, based on the Vector Space Model and Jensen–Shannon model. The results of Trustrace are up to 22.7% more precise and have 7.66% better recall values than those of the other techniques, on average. We thus show that mining software repositories and combining the mined data with existing results from IR techniques improves the precision and recall of requirement traceability links.
  40. [40]L. Guerrouj, M. D. Penta, G. Antoniol, and Y.-G. Guéhéneuc, “TIDIER: an identifier splitting approach using speech recognition techniques,” Journal of Software: Evolution and Process, vol. 25, no. 6, pp. 575–599, 2013.
    Bibtex
      @article{journals/smr/GuerroujPAG13,
      author = {Guerrouj, Latifa and Penta, Massimiliano Di and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {TIDIER: an identifier splitting approach using speech recognition techniques},
      journal = {Journal of Software: Evolution and Process},
      volume = {25},
      number = {6},
      year = {2013},
      pages = {575-599},
      ee = {http://dx.doi.org/10.1002/smr.539},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract The software engineering literature reports empirical evidence on the relation between various characteristics of a software system and its quality. Amon g other factors, recent studies have shown that a proper choice of identifiers influences understandability and maintainability. Indeed, identifiers are developers’ main source of information and guide their cognitive processes during program comprehension when high-level documentation is scarce or outdat ed and when source code is not sufficiently commented. This paper proposes a novel approach to recognize words composing source code identifiers. The approach is based on an adaptation of Dynamic Time Warping used to recognize words in continuous speech. The approach overcomes the limitations of existing identifier-splitting approaches when naming conventions (e.g. Camel Case) are not used or when identifiers contain abbreviations. We apply the approach on a sample of more than 1,000 identifiers extracted from 340 C programs and compare its results with a simple Camel Case splitter and with an implementation of an alternative identifier splitting approach, Samurai. Results indicate the capability of the novel approach: (i) to outper form the alternative ones, when using a dictionary augmented with domain knowledge or a contextual dictionary and (ii) to expand 48 % of a set of selecte d abbreviations into dictionary words.
  41. [41]G. Antoniol and K. B. Gallagher, “Preface to the special issue on program comprehension,” Empirical Software Engineering, vol. 18, no. 2, pp. 177–180, 2013.
    Bibtex
      @article{journals/ese/AntoniolG13,
      author = {Antoniol, Giuliano and Gallagher, Keith B.},
      title = {Preface to the special issue on program comprehension},
      journal = {Empirical Software Engineering},
      volume = {18},
      number = {2},
      year = {2013},
      pages = {177-180},
      ee = {http://dx.doi.org/10.1007/s10664-012-9212-1},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  42. [42]S. Kpodjedo, F. Ricca, P. Galinier, G. Antoniol, and Y.-G. Guéhéneuc, “Studying software evolution of large object-oriented software systems using an ETGM algorithm,” Journal of Software: Evolution and Process, vol. 25, no. 2, pp. 139–163, 2013.
    Bibtex
      @article{journals/smr/KpodjedoRGAG13,
      author = {Kpodjedo, Segla and Ricca, Filippo and Galinier, Philippe and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {Studying software evolution of large object-oriented software systems using an ETGM algorithm},
      journal = {Journal of Software: Evolution and Process},
      volume = {25},
      number = {2},
      year = {2013},
      pages = {139-163},
      ee = {http://dx.doi.org/10.1002/smr.519},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  43. [43]M. Pinzger and G. Antoniol, “Guest editorial: reverse engineering,” Empirical Software Engineering, vol. 18, no. 5, pp. 857–858, 2013.
    Bibtex
      @article{journals/ese/PinzgerA13,
      author = {Pinzger, Martin and Antoniol, Giuliano},
      title = {Guest editorial: reverse engineering},
      journal = {Empirical Software Engineering},
      volume = {18},
      number = {5},
      year = {2013},
      pages = {857-858},
      ee = {http://dx.doi.org/10.1007/s10664-012-9237-5},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  44. [44]F. Khomh, M. D. Penta, Y.-G. Guéhéneuc, and G. Antoniol, “An exploratory study of the impact of antipatterns on class change- and fault-proneness,” Empirical Software Engineering, vol. 17, no. 3, pp. 243–275, 2012.
    Bibtex
      @article{art3A1010072Fs106640119171y,
      author = {Khomh, Foutse and Penta, Massimiliano Di and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {An exploratory study of the impact of antipatterns on class change- and fault-proneness},
      journal = {Empirical Software Engineering},
      volume = {17},
      number = {3},
      year = {2012},
      pages = {243-275},
      ee = {http://dx.doi.org/10.1007/s10664-011-9171-y},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2012/art3A1010072Fs106640119171y.pdf}
    }
    
      
    Abstract Antipatterns are poor design choices that are conjectured to make object-oriented systems harder to maintain. We investigate the impact of antipatterns on classes in object-oriented systems by studying the relation between the presence of antipatterns and the change- and fault-proneness of the classes. We detect 13 antipatterns in 54 releases of ArgoUML, Eclipse, Mylyn, and Rhino, and analyse (1) to what extent classes participating in antipatterns have higher odds to change or to be subject to fault-fixing than other classes, (2) to what extent these odds (if higher) are due to the sizes of the classes or to the presence of antipatterns, and (3) what kinds of changes affect classes participating in antipatterns. We show that, in almost all releases of the four systems, classes participating in antipatterns are more change-and fault-prone than others. We also show that size alone cannot explain the higher odds of classes with antipatterns to underwent a (fault-fixing) change than other classes. Finally, we show that structural changes affect more classes with antipatterns than others. We provide qualitative explanations of the increase of change- and fault-proneness in classes participating in antipatterns using release notes and bug reports. The obtained results justify a posteriori previous work on the specification and detection of antipatterns and could help to better focus quality assurance and testing activities.
  45. [45]A. Zaidman and G. Antoniol, “Preface to the special issue on reverse engineering (featuring selected papers from WCRE 2009),” Sci. Comput. Program., vol. 77, no. 6, pp. 741–742, 2012.
    Bibtex
      @article{journals/scp/ZaidmanA12,
      author = {Zaidman, Andy and Antoniol, Giuliano},
      title = {Preface to the special issue on reverse engineering (featuring selected papers from WCRE 2009)},
      journal = {Sci. Comput. Program.},
      volume = {77},
      number = {6},
      year = {2012},
      pages = {741-742},
      ee = {http://dx.doi.org/10.1016/j.scico.2011.01.008},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  46. [46]M. D. Penta, M. Harman, and G. Antoniol, “The use of search-based optimization techniques to schedule and staff software projects: an approach and an empirical study,” Softw., Pract. Exper., vol. 41, no. 5, pp. 495–519, 2011.
    Bibtex
      @article{1001_ftp,
      author = {Penta, Massimiliano Di and Harman, Mark and Antoniol, Giuliano},
      title = {The use of search-based optimization techniques to schedule and staff software projects: an approach and an empirical study},
      journal = {Softw., Pract. Exper.},
      volume = {41},
      number = {5},
      year = {2011},
      pages = {495-519},
      ee = {http://dx.doi.org/10.1002/spe.1001},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2011/1001_ftp.pdf}
    }
    
      
    Abstract Allocating resources to a software project and assigning tasks to teams constitute crucial activities that affect project cost and completion time. Finding a solution for such a problem is NP-hard; this requires managers to be supported by proper tools in performing such an allocation. This paper shows how search-based optimization techniques can be combined with a queuing simulation model to address these problems. The obtained staff and task allocations aim to minimize the completion time and reduce schedule fragmentation. The proposed approach allows project managers to run multiple simulations, compare results and consider trade-offs between increasing the staffing level and anticipating the project completion date and between reducing the fragmentation and accepting project delays. The paper presents results from the application of the proposed search-based project planning approach to data obtained from two large scale commercial software maintenance projects.
  47. [47]S. Bouktif, F. Ahmed, I. Khalil, and G. Antoniol, “Corrigendum to ‘A novel composite model approach to improve software quality prediction’ [Information and Software Technology 52 (12) (2010) 1298-1311],” Information & Software Technology, vol. 53, no. 3, p. 291, 2011.
    Bibtex
      @article{journals/infsof/BouktifAKA11,
      author = {Bouktif, Salah and Ahmed, Faheem and Khalil, Issa and Antoniol, Giuliano},
      title = {Corrigendum to "A novel composite model approach to improve software quality prediction" [Information and Software Technology 52 (12) (2010) 1298-1311]},
      journal = {Information {\&} Software Technology},
      volume = {53},
      number = {3},
      year = {2011},
      pages = {291},
      ee = {http://dx.doi.org/10.1016/j.infsof.2010.12.004},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  48. [48]S. Kpodjedo, F. Ricca, P. Galinier, Y.-G. Guéhéneuc, and G. Antoniol, “Design evolution metrics for defect prediction in object oriented systems,” Empirical Software Engineering, vol. 16, no. 1, pp. 141–175, 2011.
    Bibtex
      @article{art3A1010072Fs1066401091517,
      author = {Kpodjedo, Segla and Ricca, Filippo and Galinier, Philippe and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Design evolution metrics for defect prediction in object oriented systems},
      journal = {Empirical Software Engineering},
      volume = {16},
      number = {1},
      year = {2011},
      pages = {141-175},
      ee = {http://dx.doi.org/10.1007/s10664-010-9151-7},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2011/art3A1010072Fs1066401091517.pdf}
    }
    
      
    Abstract Testing is the most widely adopted practice to ensure software quality. However, this activity is often a compromise between the available resources and software quality. In object-oriented development, testing effort should be focused on defective classes. Unfortunately, identifying those classes is a challenging and difficult activity on which many metrics, techniques, and models have been tried. In this paper, we investigate the usefulness of elementary design evolution metrics to identify defective classes. The metrics include the numbers of added, deleted, and modified attributes, methods, and relations. The metrics are used to recommend a ranked list of classes likely to contain defects for a system. They are compared to Chidamber and Kemerer’s metrics on several versions of Rhino and of ArgoUML. Further comparison is conducted with the complexity metrics computed by Zimmermann et al. on several releases of Eclipse. The comparisons are made according to three criteria: presence of defects, number of defects, and defect density in the top-ranked classes. They show that the design evolution metrics, when used in conjunction with known metrics, improve the identification of defective classes. In addition, they show that the design evolution metrics make significantly better predictions of defect density than other metrics and, thus, can help in reducing the testing effort by focusing test activity on a reduced volume of code.
  49. [49]S. Bouktif, F. Ahmed, I. Khalil, and G. Antoniol, “A novel composite model approach to improve software quality prediction,” Information & Software Technology, vol. 52, no. 12, pp. 1298–1311, 2010.
    Bibtex
      @article{journals/infsof/BouktifAKA10,
      author = {Bouktif, Salah and Ahmed, Faheem and Khalil, Issa and Antoniol, Giuliano},
      title = {A novel composite model approach to improve software quality prediction},
      journal = {Information {\&} Software Technology},
      volume = {52},
      number = {12},
      year = {2010},
      pages = {1298-1311},
      ee = {http://dx.doi.org/10.1016/j.infsof.2010.07.003},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  50. [50]S. Kpodjedo, P. Galinier, and G. Antoniol, “On the use of similarity metrics for approximate graph matching,” Electronic Notes in Discrete Mathematics, vol. 36, pp. 687–694, 2010.
    Bibtex
      @article{journals/endm/KpodjedoGA10,
      author = {Kpodjedo, Segla and Galinier, Philippe and Antoniol, Giuliano},
      title = {On the use of similarity metrics for approximate graph matching},
      journal = {Electronic Notes in Discrete Mathematics},
      volume = {36},
      year = {2010},
      pages = {687-694},
      ee = {http://dx.doi.org/10.1016/j.endm.2010.05.087},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  51. [51]J. K.-Y. Ng, Y.-G. Guéhéneuc, and G. Antoniol, “Identification of behavioural and creational design motifs through dynamic analysis,” Journal of Software Maintenance, vol. 22, no. 8, pp. 597–627, 2010.
    Bibtex
      @article{journals/smr/NgGA10,
      author = {Ng, Janice Ka-Yee and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Identification of behavioural and creational design motifs through dynamic analysis},
      journal = {Journal of Software Maintenance},
      volume = {22},
      number = {8},
      year = {2010},
      pages = {597-627},
      ee = {http://dx.doi.org/10.1002/smr.421},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Design patterns offer design motifs, solutions to object-oriented design problems. Design motifs lead to well-structured designs and thus are believed to ease software maintenance. However, after use, they are often ‘lost’ and are consequently of little help during program comprehension and other maintenance activities. Therefore, several works proposed design pattern identification approaches to recover occurrences of the motifs. These approaches mainly used the structure and organization of classes as input. Consequently, they have a low precision when considering behavioural and creational motifs, which pertain to the assignment of responsibilities and the collaborations among objects at runtime. We propose MoDeC, an approach to describe behavioural and creational motifs as collaborations among objects in the form of scenario diagrams. We identify these motifs using dynamic analysis and constraint programming. Using a proof-of-concept implementation of MoDeC and different scenarios for five other Java programs and Builder, Command, and Visitor, we show that MoDeC has a better precision than the state-of-the-art static approaches.
  52. [52]M. D. Penta, G. Antoniol, and M. Harman, “Special Issue on Search-Based Software Maintenance,” Journal of Software Maintenance, vol. 20, no. 5, pp. 317–319, 2008.
    Bibtex
      @article{journals/smr/PentaAH08,
      author = {Penta, Massimiliano Di and Antoniol, Giuliano and Harman, Mark},
      title = {Special Issue on Search-Based Software Maintenance},
      journal = {Journal of Software Maintenance},
      volume = {20},
      number = {5},
      year = {2008},
      pages = {317-319},
      ee = {http://dx.doi.org/10.1002/smr.395},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  53. [53]Y.-G. Guéhéneuc and G. Antoniol, “DeMIMA: A Multilayered Approach for Design Pattern Identification,” IEEE Trans. Software Eng., vol. 34, no. 5, pp. 667–684, 2008.
    Bibtex
      @article{journals/tse/GueheneucA08,
      author = {Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {DeMIMA: A Multilayered Approach for Design Pattern Identification},
      journal = {IEEE Trans. Software Eng.},
      volume = {34},
      number = {5},
      year = {2008},
      pages = {667-684},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2008.48},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Design patterns are important in object-oriented programming because they offer design motifs, elegant solutions to recurrent design problems, which improve the quality of software systems. Design motifs facilitate system maintenance by helping to understand design and implementation. However, after implementation, design motifs are spread throughout the source code and are thus not directly available to maintainers. We present DeMIMA, an approach to identify semi-automatically micro-architectures that are similar to design motifs in source code and to ensure the traceability of these micro-architectures between implementation and design. DeMIMA consists of three layers: two layers to recover an abstract model of the source code, including binary class relationships, and a third layer to identify design patterns in the abstract model. We apply DeMIMA to five open-source systems and, on average, we observe 34% precision for the considered 12 design motifs. Through the use of explanation-based constraint programming, DeMIMA ensures 100% recall on the five systems. We also apply DeMIMA on 33 industrial components.
  54. [54]C. D. Grosso, G. Antoniol, E. Merlo, and P. Galinier, “Detecting buffer overflow via automatic test input data generation,” Computers & OR, vol. 35, no. 10, pp. 3125–3143, 2008.
    Bibtex
      @article{journals/cor/GrossoAMG08,
      author = {Grosso, Concettina Del and Antoniol, Giuliano and Merlo, Ettore and Galinier, Philippe},
      title = {Detecting buffer overflow via automatic test input data generation},
      journal = {Computers {\&} OR},
      volume = {35},
      number = {10},
      year = {2008},
      pages = {3125-3143},
      ee = {http://dx.doi.org/10.1016/j.cor.2007.01.013},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Buffer overflows cause serious problems in various categories of software systems. In critical systems, such as health-care, nuclear or aerospace software applications, a buffer overflow may cause severe threats to humans or severe economic losses. If they occur in network or security applications, they can be exploited to gain administrator privileges, perform system attacks, access unauthorized data, or misuse the system. This paper proposes a combination of genetic algorithms, linear programming, evolutionary testing, and static and dynamic information to detect buffer overflows. The newly proposed test input generation process avoids the need for human intervention to define and tune genetic algorithm weights and therefore it becomes completely automated. The process that guides the genetic search towards the detection of buffer overflow relies on a fitness function that takes into account static and dynamic information. Reported results of our case studies, consisting of two sets of open-source programs show that the new process and fitness function outperform previously published approaches.
  55. [55]D. Poshyvanyk, Y.-G. Guéhéneuc, A. Marcus, G. Antoniol, and V. Rajlich, “Feature Location Using Probabilistic Ranking of Methods Based on Execution Scenarios and Information Retrieval,” IEEE Trans. Software Eng., vol. 33, no. 6, pp. 420–432, 2007.
    Bibtex
      @article{journals/tse/PoshyvanykGMAR07,
      author = {Poshyvanyk, Denys and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Marcus, Andrian and Antoniol, Giuliano and Rajlich, V{\'a}clav},
      title = {Feature Location Using Probabilistic Ranking of Methods Based on Execution Scenarios and Information Retrieval},
      journal = {IEEE Trans. Software Eng.},
      volume = {33},
      number = {6},
      year = {2007},
      pages = {420-432},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2007.1016},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract This paper recasts the problem of feature location in source code as a decision-making problem in the presence of uncertainty. The solution to the problem is formulated as a combination of the opinions of different experts. The experts in this work are two existing techniques for feature location: a scenario-based probabilistic ranking of events and an information-retrieval-based technique that uses Latent Semantic Indexing. The combination of these two experts is empirically evaluated through several case studies, which use the source code of the Mozilla Web browser and the Eclipse integrated development environment. The results show that the combination of experts significantly improves the effectiveness of feature location as compared to each of the experts used independently.
  56. [56]S. Bellon, R. Koschke, G. Antoniol, J. Krinke, and E. Merlo, “Comparison and Evaluation of Clone Detection Tools,” IEEE Trans. Software Eng., vol. 33, no. 9, pp. 577–591, 2007.
    Bibtex
      @article{04288192,
      author = {Bellon, Stefan and Koschke, Rainer and Antoniol, Giuliano and Krinke, Jens and Merlo, Ettore},
      title = {Comparison and Evaluation of Clone Detection Tools},
      journal = {IEEE Trans. Software Eng.},
      volume = {33},
      number = {9},
      year = {2007},
      pages = {577-591},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2007.70725},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2007/04288192.pdf}
    }
    
      
    Abstract Many techniques for detecting duplicated source code (software clones) have been proposed in the past. However, it is not yet clear how these techniques compare in terms of recall and precision as well as space and time requirements. This paper presents an experiment that evaluates six clone detectors based on eight large C and Java programs (altogether almost 850 KLOC). Their clone candidates were evaluated by one of the authors as independent third party. The selected techniques cover the whole spectrum of the state-of-the-art in clone detection. The techniques work on text, lexical and syntactic information, software metrics, and program dependency graphs. Index Terms– Redundant code, duplicated code, software clones.
  57. [57]G. Antoniol and M. Ceccarelli, “Microarray image gridding with stochastic search based approaches,” Image Vision Comput., vol. 25, no. 2, pp. 155–163, 2007.
    Bibtex
      @article{1-s2.0-S0262885606000710-main,
      author = {Antoniol, Giuliano and Ceccarelli, Michele},
      title = {Microarray image gridding with stochastic search based approaches},
      journal = {Image Vision Comput.},
      volume = {25},
      number = {2},
      year = {2007},
      pages = {155-163},
      ee = {http://dx.doi.org/10.1016/j.imavis.2006.01.023},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2007/1-s2.0-S0262885606000710-main.pdf}
    }
    
      
    Abstract The paper reports a novel approach for the problem of automatic gridding in Microarray images. Such problem often requires human intervention, therefore, the development of automated procedures is a fundamental issue for large scale functional genomic experiments involving many microarray images. Our method uses a two step process. First a regular rectangular grid is superimposed on the image by interpolating a set of guide spots, this is done by solving a non-linear optimization process with a stochastic search producing the best interpolating grid parametrised by a six values vector. Second, the interpolating grid is adapted, with a Markov Chain Monte Carlo method, to local deformations. This is done by modeling the solution a Markov Random Field with a Gibbs prior possibly containing first order cliques (1-clique). The algorithm is completely automatic and no human intervention is required, it efficiently accounts arbitrary grid rotations, irregularities and various spot sizes.
  58. [58]G. Antoniol, J. Krinke, and P. Tonella, “Special issue on Source code analysis and manipulation,” Sci. Comput. Program., vol. 62, no. 3, pp. 205–208, 2006.
    Bibtex
      @article{journals/scp/AntoniolKT06,
      author = {Antoniol, Giuliano and Krinke, Jens and Tonella, Paolo},
      title = {Special issue on Source code analysis and manipulation},
      journal = {Sci. Comput. Program.},
      volume = {62},
      number = {3},
      year = {2006},
      pages = {205-208},
      ee = {http://dx.doi.org/10.1016/j.scico.2006.04.009},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  59. [59]G. Antoniol and Y.-G. Guéhéneuc, “Feature Identification: An Epidemiological Metaphor,” IEEE Trans. Software Eng., vol. 32, no. 9, pp. 627–641, 2006.
    Bibtex
      @article{01707664,
      author = {Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {Feature Identification: An Epidemiological Metaphor},
      journal = {IEEE Trans. Software Eng.},
      volume = {32},
      number = {9},
      year = {2006},
      pages = {627-641},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2006.88},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2006/01707664.pdf}
    }
    
      
    Abstract Feature identification is a technique to identify the source code constructs activated when exercising one of the features of a program. We propose new statistical analyses of static and dynamic data to accurately identify features in large multithreaded object-oriented programs. We draw inspiration from epidemiology to improve previous approaches to feature identification and develop an epidemiological metaphor. We build our metaphor on our previous approach to feature identification, in which we use processor emulation, knowledge-based filtering, probabilistic ranking, and metamodeling. We carry out three case studies to assess the usefulness of our metaphor, using the "save a bookmark" feature of Web browsers as an illustration. In the first case study, we compare our approach with three previous approaches (a naive approach, a concept analysis-based approach, and our previous probabilistic approach) in identifying the feature in MOZILLA, a large, real-life, multithreaded object-oriented program. In the second case study, we compare the implementation of the feature in the FIREFOX and MOZILLA Web browsers. In the third case study, we identify the same feature in two more Web browsers, Chimera (in C) and ICEBrowser (in Java), and another feature in JHOTDRAW and XFIG, to highlight the generalizability of our metaphor
  60. [60]M. Ceccarelli and G. Antoniol, “A Deformable Grid-Matching Approach for Microarray Images,” IEEE Transactions on Image Processing, vol. 15, no. 10, pp. 3178–3188, 2006.
    Bibtex
      @article{01703603,
      author = {Ceccarelli, Michele and Antoniol, Giuliano},
      title = {A Deformable Grid-Matching Approach for Microarray Images},
      journal = {IEEE Transactions on Image Processing},
      volume = {15},
      number = {10},
      year = {2006},
      pages = {3178-3188},
      ee = {http://dx.doi.org/10.1109/TIP.2006.877488},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2006/01703603.pdf}
    }
    
      
    Abstract A fundamental step of microarray image analysis is the detection of the grid structure for the accurate location of each spot, representing the state of a given gene in a particular experimental condition. This step is known as gridding and belongs to the class of deformable grid matching problems which are well known in literature. Most of the available microarray gridding approaches require human intervention; for example, to specify landmarks, some points in the spot grid, or even to precisely locate individual spots. Automating this part of the process can allow high throughput analysis. This paper focuses on the development of a fully automated procedure for the problem of automatic microarray gridding. It is grounded on the Bayesian paradigm and on image analysis techniques. The procedure has two main steps. The first step, based on the Radon transform, is aimed at generating a grid hypothesis; the second step accounts for local grid deformations. The accuracy and properties of the procedure are quantitatively assessed over a set of synthetic and real images; the results are compared with well-known methods available from the literature.
  61. [61]G. Antoniol, V. F. Rollo, and G. Venturi, “Linear predictive coding and cepstrum coefficients for mining time variant information from software repositories,” ACM SIGSOFT Software Engineering Notes, vol. 30, no. 4, pp. 1–5, 2005.
    Bibtex
      @article{p14-antoniol,
      author = {Antoniol, Giuliano and Rollo, Vincenzo Fabio and Venturi, Gabriele},
      title = {Linear predictive coding and cepstrum coefficients for mining time variant information from software repositories},
      journal = {ACM SIGSOFT Software Engineering Notes},
      volume = {30},
      number = {4},
      year = {2005},
      pages = {1-5},
      ee = {http://doi.acm.org/10.1145/1082983.1083156},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2005/p14-antoniol.pdf}
    }
    
      
    Abstract This paper presents an approach to recover time variant information from software repositories. It is widely accepted that software evolves due to factors such as defect removal, market opportunity or adding new features. Software evolution details are stored in software repositories which often contain the changes history. On the other hand there is a lack of approaches, technologies and methods to efficiently extract and represent time dependent information. Disciplines such as signal and image processing or speech recognition adopt frequency domain representations to mitigate differences of signals evolving in time. Inspired by time-frequency duality, this paper proposes the use of Linear Predictive Coding (LPC) and Cepstrum coefficients to model time varying software artifact histories. LPC or Cepstrum allow obtaining very compact representations with linear complexity. These representations can be used to highlight components and artifacts evolved in the same way or with very similar evolution patterns. To assess the proposed approach we applied LPC and Cepstral analysis to 211 Linux kernel releases (i.e., from 1.0 to 1.3.100), to identify files with very similar size histories. The approach, the preliminary results and the lesson learned are presented in this paper.
  62. [62]M. D. Penta, M. Neteler, G. Antoniol, and E. Merlo, “A language-independent software renovation framework,” Journal of Systems and Software, vol. 77, no. 3, pp. 225–240, 2005.
    Bibtex
      @article{journals/jss/PentaNAM05,
      author = {Penta, Massimiliano Di and Neteler, Markus and Antoniol, Giuliano and Merlo, Ettore},
      title = {A language-independent software renovation framework},
      journal = {Journal of Systems and Software},
      volume = {77},
      number = {3},
      year = {2005},
      pages = {225-240},
      ee = {http://dx.doi.org/10.1016/j.jss.2004.03.033},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  63. [63]G. Antoniol, M. D. Penta, H. Gall, and M. Pinzger, “Towards the Integration of Versioning Systems, Bug Reports and Source Code Meta-Models,” Electr. Notes Theor. Comput. Sci., vol. 127, no. 3, pp. 87–99, 2005.
    Bibtex
      @article{journals/entcs/AntoniolPGP05,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di and Gall, Harald and Pinzger, Martin},
      title = {Towards the Integration of Versioning Systems, Bug Reports and Source Code Meta-Models},
      journal = {Electr. Notes Theor. Comput. Sci.},
      volume = {127},
      number = {3},
      year = {2005},
      pages = {87-99},
      ee = {http://dx.doi.org/10.1016/j.entcs.2004.08.036},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  64. [64]G. Antoniol, A. Cimitile, G. A. D. Lucca, and M. D. Penta, “Assessing Staffing Needs for a Software Maintenance Project through Queuing Simulation,” IEEE Trans. Software Eng., vol. 30, no. 1, pp. 43–58, 2004.
    Bibtex
      @article{01265735,
      author = {Antoniol, Giuliano and Cimitile, Aniello and Lucca, Giuseppe A. Di and Penta, Massimiliano Di},
      title = {Assessing Staffing Needs for a Software Maintenance Project through Queuing Simulation},
      journal = {IEEE Trans. Software Eng.},
      volume = {30},
      number = {1},
      year = {2004},
      pages = {43-58},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2004.1265735},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2004/01265735.pdf}
    }
    
      
    Abstract We present an approach based on queuing theory and stochastic simulation to help planning, managing, and controlling the project staffing and the resulting service level in distributed multiphase maintenance processes. Data from a Y2K massive maintenance intervention on a large COBOL/JCL financial software system were used to simulate and study different service center configurations for a geographically distributed software maintenance project. In particular, a monolithic configuration corresponding to the customer’s point-of-view and more fine-grained configurations, accounting for different process phases as well as for rework, were studied. The queuing theory and stochastic simulation provided a means to assess staffing, evaluate service level, and assess the likelihood to meet the project deadline while executing the project. It turned out to be an effective staffing tool for managers, provided that it is complemented with other project-management tools, in order to prioritize activities, avoid conflicts, and check the availability of resources.
  65. [65]G. Antoniol, S. Gradara, and G. Venturi, “Methodological issues in a CMM Level 4 implementation,” Software Process: Improvement and Practice, vol. 9, no. 1, pp. 33–50, 2004.
    Bibtex
      @article{journals/sopr/AntoniolGV04,
      author = {Antoniol, Giuliano and Gradara, Sara and Venturi, Gabriele},
      title = {Methodological issues in a CMM Level 4 implementation},
      journal = {Software Process: Improvement and Practice},
      volume = {9},
      number = {1},
      year = {2004},
      pages = {33-50},
      ee = {http://dx.doi.org/10.1002/spip.183},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract The Capability Maturity Model (CMM) developed by the Software Engineering Institute is an improvement paradigm. It provides a framework for assessing the maturity of software processes on a five level scale, and guidelines which help to improve software process and artifact quality. Moving towards CMM Level 4 and Level 5, is a very demanding task even for large software companies already accustomed to the CMM and ISO certifications. It requires, for example, quality monitoring, control, feedback, and process optimization. In fact, going beyond CMM Level 3 requires a radical change in the way projects are carried out and managed. It involves quantitative and statistical techniques to control software processes and quality, and it entails substantial changes in the way the organization approaches software life cycle activities. In this paper we describe the process changes, adaptation, integration and tailoring, and we report lessons learned while preparing an Italian solution centre of EDS for the Level 4 internal assessment. The solution centre has about 350 people and carries out about 40 software development and maintenance projects each year. We describe how Level 4 Key Process Areas have been implemented building a methodological framework which leverages both existing available methodologies and practices already in place (e.g., derived form ISO compliance). We discuss how methodologies have been adapted to the company’s internal and external situation and what are the underlining assumptions for the methodology adaptation. Furthermore we discuss cultural and organizational changes required to obtain a CMM Level 4 certification. The steps and the process improvement we have carried out, and the challenges we have faced were most likely those whith the highest risk and cost driving factor common to all organizations aiming at achieving CMM Level 4.
  66. [66]G. Antoniol, M. D. Penta, G. Masone, and U. Villano, “Compiler Hacking for Source Code Analysis,” Software Quality Journal, vol. 12, no. 4, pp. 383–406, 2004.
    Bibtex
      @article{journals/sqj/AntoniolPMV04,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di and Masone, Gianluca and Villano, Umberto},
      title = {Compiler Hacking for Source Code Analysis},
      journal = {Software Quality Journal},
      volume = {12},
      number = {4},
      year = {2004},
      pages = {383-406},
      ee = {http://dx.doi.org/10.1023/B:SQJO.0000039794.29432.7e},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Many activities related to software quality assessment and improvement, such as empirical model construction, data flow analysis, testing or reengineering, rely on static source code analysis as the first and fundamental step for gathering the necessary input information. In the past, two different strategies have been adopted to develop tool suites. There are tools encompassing or implementing the source parse step, where the parser is internal to the toolkit, and is developed and maintained with it. A different approach builds tools on the top of external already-available components such as compilers that output the program abstract syntax tree, or that make it available via an API. This paper discusses techniques, issues and challenges linked to compiler patching or wrapping for analysis purposes. In particular, different approaches for accessing the compiler parsing information are compared, and the techniques used to decouple the parsing front end from the analysis modules are discussed. Moreover, the paper presents an approach and a tool, XOgastan, developed exploiting the gcc/g++ ability to save a representation of the intermediate abstract syntax tree. XOgastan translates the gcc/g++ dumped abstract syntax tree format into a Graph eXchange Language representation, which makes it possible to take advantage of currently available XML tools for any subsequent analysis step. The tool is illustrated and its design discussed, showing its architecture and the main implementation choices made.
  67. [67]G. Antoniol, R. Fiutem, and C. J. Lokan, “Object-Oriented Function Points: An Empirical Validation,” Empirical Software Engineering, vol. 8, no. 3, pp. 225–254, 2003.
    Bibtex
      @article{journals/ese/AntoniolFL03,
      author = {Antoniol, Giuliano and Fiutem, Roberto and Lokan, Christopher J.},
      title = {Object-Oriented Function Points: An Empirical Validation},
      journal = {Empirical Software Engineering},
      volume = {8},
      number = {3},
      year = {2003},
      pages = {225-254},
      ee = {http://dx.doi.org/10.1023/A:1024472727275},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract We present an empirical validation of Object-Oriented size estimation models. In previous work we proposed Object Oriented Function Points (OOFP) an adaptation of the Function Points approach to Object-Oriented systems. In a small pilot study we used the OOFP method to estimate LOC. In this paper we extend the empirical validation of OOFP substantially using a larger data set and comparing OOFP with alternative predictors of LOC. The aim of the paper is to gain an understanding of which factors contribute to accurate size prediction for OO software and to position OOFP within that knowledge. A cross validation approach was adopted to build and evaluate linear models where the independent variable was either a traditional OO entity (classes methods association inheritance or a combination of them) or an OOFP-related measure. Using the full OOFP process the best size predictor achieved a normalized mean squared error of 38%. By removing Function Point weighting tables from the OOFP process and carefully analyzing collected data points and developer practices we identified several factors that influence size estimation. Our empirical evidence demonstrates that by controlling these factors size estimates could be substantially improved decreasing the normalized mean squared error to 15% — in relative terms a 56% reduction.
  68. [68]G. Antoniol, U. Villano, E. Merlo, and M. D. Penta, “Analyzing cloning evolution in the Linux kernel,” Information & Software Technology, vol. 44, no. 13, pp. 755–765, 2002.
    Bibtex
      @article{1s20S0950584902001234main,
      author = {Antoniol, Giuliano and Villano, Umberto and Merlo, Ettore and Penta, Massimiliano Di},
      title = {Analyzing cloning evolution in the Linux kernel},
      journal = {Information {\&} Software Technology},
      volume = {44},
      number = {13},
      year = {2002},
      pages = {755-765},
      ee = {http://dx.doi.org/10.1016/S0950-5849(02)00123-4},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2002/1-s2.0-S0950584902001234-main.pdf}
    }
    
      
    Abstract Identifying code duplication in large multi-platform software system is a challenging problem. This is due to a variety of reasons including the presence of high-level programming languages and structures interleaved with hardware-dependent low-level resources and assembler code the use of GUI-based configuration scripts generating commands to compile the system and the extremely high number of possible different configurations. This paper studies the extent and the evolution of code duplications in the Linux kernel. Linux is a large multi-platform software system; it is based on the Open Source concept and so there are no obstacles to discussing its implementation. In addition it is decidedly too large to be examined manually: the current Linux kernel release (2.4.18) is about three million LOCs.
  69. [69]G. Antoniol, G. Canfora, G. Casazza, A. D. Lucia, and E. Merlo, “Recovering Traceability Links between Code and Documentation,” IEEE Trans. Software Eng., vol. 28, no. 10, pp. 970–983, 2002.
    Bibtex
      @article{journals/tse/AntoniolCCLM02,
      author = {Antoniol, Giuliano and Canfora, Gerardo and Casazza, Gerardo and Lucia, Andrea De and Merlo, Ettore},
      title = {Recovering Traceability Links between Code and Documentation},
      journal = {IEEE Trans. Software Eng.},
      volume = {28},
      number = {10},
      year = {2002},
      pages = {970-983},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2002.1041053},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Software system documentation is almost always expressed informally in natural language and free text. Examples include requirement specifications design documents manual pages system development journals error logs and related maintenance reports. We propose a method based on information retrieval to recover traceability links between source code and free text documents. A premise of our work is that programmers use meaningful names for program items such as functions variables types classes and methods. We believe that the application-domain knowledge that programmers process when writing the code is often captured by the mnemonics for identifiers; therefore the analysis of these mnemonics can help to associate high level concepts with program concepts and vice-versa. We apply both a probabilistic and a vector space information retrieval model in two case studies to trace C++ source code onto manual pages and Java code onto functional requirements. We compare the results of applying the two models discuss the benefits and limitations and describe directions for improvements.
  70. [70]G. Antoniol, B. Caprile, A. Potrich, and P. Tonella, “Design-code traceability recovery: selecting the basic linkage properties,” Sci. Comput. Program., vol. 40, no. 2-3, pp. 213–234, 2001.
    Bibtex
      @article{journals/scp/AntoniolCPT01,
      author = {Antoniol, Giuliano and Caprile, Bruno and Potrich, Alessandra and Tonella, Paolo},
      title = {Design-code traceability recovery: selecting the basic linkage properties},
      journal = {Sci. Comput. Program.},
      volume = {40},
      number = {2-3},
      year = {2001},
      pages = {213-234},
      ee = {http://dx.doi.org/10.1016/S0167-6423(01)00016-8},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Traceability ensures that software artifacts of subsequent phases of the development cycle are consistent. Few works have so far addressed the problem of automatically recovering traceability links between object oriented (OO) design and code entities. Such recovery process is required whenever there is no explicit support to traceability from the development process. The recovered information can drive the evolution of the available design so that it corresponds to the code thus providing a still useful and updated high level view of the system. Automatic recovery of traceability links can be achieved by determining the similarity of paired elements from design and code. The choice of the properties involved in the similarity computation is crucial for the success of the recovery process. In fact design and code objects are complex artifacts with several properties attached. The basic anchors of the recovered traceability links should be chosen as those properties (or property combinations) which are expected to be maintained during the transformation of design into code. In this paper different categories of basic properties of design and code entities will be evaluated with respect to the contribution they give to traceability recovery. Several industrial software components will be employed as a benchmark on which the performances of the alternatives are measured.
  71. [71]G. Antoniol, G. Casazza, M. D. Penta, and R. Fiutem, “Object-oriented design patterns recovery,” Journal of Systems and Software, vol. 59, no. 2, pp. 181–196, 2001.
    Bibtex
      @article{journals/jss/AntoniolCPF01,
      author = {Antoniol, Giuliano and Casazza, Gerardo and Penta, Massimiliano Di and Fiutem, Roberto},
      title = {Object-oriented design patterns recovery},
      journal = {Journal of Systems and Software},
      volume = {59},
      number = {2},
      year = {2001},
      pages = {181-196},
      ee = {http://dx.doi.org/10.1016/S0164-1212(01)00061-9},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Object-Oriented design patterns are an emergent technology: they are reusable micro-architectures high-level building blocks. A system which has been designed using well-known documented and accepted design patterns is also likely to exhibit good properties such as modularity separation of concerns and maintainability. While for forward engineering the benefits of using design patterns are clear using reverse engineering technologies to discover instances of patterns in a software artifact (e.g. design or code) may help in several key areas among which are program understanding design-to-code traceability and quality assessment. This paper describes a conservative approach and experimental results based on a multi-stage reduction strategy using OO software metrics and structural properties to extract structural design patterns from OO design or C++ code. To assess the effectiveness of the pattern recovery approach a process and a portable tool suite written in Java remotely accessible by means of any WEB browser has been developed. The developed system and experimental results on 8 industrial software (design and code) and 200000 lines of public domain C++ code are presented.
  72. [72]F. Calzolari, P. Tonella, and G. Antoniol, “Maintenance and testing effort modeled by linear and nonlinear dynamic systems,” Information & Software Technology, vol. 43, no. 8, pp. 477–486, 2001.
    Bibtex
      @article{1s20S0950584901001562main,
      author = {Calzolari, F. and Tonella, Paolo and Antoniol, Giuliano},
      title = {Maintenance and testing effort modeled by linear and nonlinear dynamic systems},
      journal = {Information {\&} Software Technology},
      volume = {43},
      number = {8},
      year = {2001},
      pages = {477-486},
      ee = {http://dx.doi.org/10.1016/S0950-5849(01)00156-2},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2001/1s20S0950584901001562main.pdf}
    }
    
      
    Abstract Maintenance and testing activities — conducted, respectively, on the release currently in use/to be delivered — absorb most of total lifetime cost of software development. Such economic relevance suggests investigating the maintenance and testing processes to find models allowing software engineers to better estimate, plan and manage costs and activities. Ecological systems in which predators and prey compete for surviving were investigated by applying suitable mathematical models. An analogy can be drawn between biological prey and software defects, and between predators and programmers. In fact, when programmers start trying to recognize and correct code defects, while the number of residual defects decreases, the effort spent to find any new defect has an initial increase, followed by a decline, when almost all defects are removed, similar to prey and predator populations. This paper proposes to describe the evolution of the maintenance and testing effort by means of the predator–prey dynamic model. The applicability of the model is supported by the experimental data about two real world projects. The fit of the model when parameters are estimated on all available data is high, and accurate predictions can be obtained when an initial segment of the available data is used for parameter estimation.
  73. [73]P. Tonella and G. Antoniol, “Inference of object-oriented design patterns,” Journal of Software Maintenance, vol. 13, no. 5, pp. 309–330, 2001.
    Bibtex
      @article{journals/smr/TonellaA01,
      author = {Tonella, Paolo and Antoniol, Giuliano},
      title = {Inference of object-oriented design patterns},
      journal = {Journal of Software Maintenance},
      volume = {13},
      number = {5},
      year = {2001},
      pages = {309-330},
      ee = {http://dx.doi.org/10.1002/smr.235},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  74. [74]G. Antoniol, G. Canfora, G. Casazza, and A. D. Lucia, “Maintaining traceability links during object-oriented software evolution,” Softw., Pract. Exper., vol. 31, no. 4, pp. 331–355, 2001.
    Bibtex
      @article{journals/spe/AntoniolCCL01,
      author = {Antoniol, Giuliano and Canfora, Gerardo and Casazza, Gerardo and Lucia, Andrea De},
      title = {Maintaining traceability links during object-oriented software evolution},
      journal = {Softw., Pract. Exper.},
      volume = {31},
      number = {4},
      year = {2001},
      pages = {331-355},
      ee = {http://dx.doi.org/10.1002/spe.374},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract This paper presents a method to build and maintain traceability links and properties of a set of OO software releases. The method recovers an “as is” design from C++ software releases compares recovered designs at the class interface level and helps the user to deal with inconsistencies by pointing out regions of code where differences are concentrated. The comparison step exploits edit distance and a maximum match algorithm. The method has been experimented with on two freely available C++ systems. Results as well as examples of applications to the visualization of the traceability information and to the estimation of the size of changes during maintenance are reported in the paper.
  75. [75]G. Antoniol, B. Caprile, A. Potrich, and P. Tonella, “Design-code traceability for object-oriented systems,” Ann. Software Eng., vol. 9, pp. 35–58, 2000.
    Bibtex
      @article{art3A1010232FA3A1018916522804,
      author = {Antoniol, Giuliano and Caprile, Bruno and Potrich, Alessandra and Tonella, Paolo},
      title = {Design-code traceability for object-oriented systems},
      journal = {Ann. Software Eng.},
      volume = {9},
      year = {2000},
      pages = {35-58},
      ee = {http://dx.doi.org/10.1023/A:1018916522804},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2000/art3A1010232FA3A1018916522804.pdf}
    }
    
      
    Abstract Traceability is a key issue to ensure consistency among software artifacts of subsequent phases of the development cycle. However, few works have so far addressed the theme of tracing object oriented (OO) design into its implementation and evolving it. This paper presents an approach to checking the compliance of OO design with respect to source code and support its evolution. The process works on design artifacts expressed in the OMT (Object Modeling Technique) notation and accepts C++ source code. It recovers an “as is” design from the code, compares the recovered design with the actual design and helps the user to deal with inconsistencies. The recovery process exploits the edit distance computation and the maximum match algorithm to determine traceability links between design and code. The output is a similarity measure associated to design‐code class pairs, which can be classified as matched and unmatched by means of a maximum likelihood threshold. A graphic display of the design with different green levels associated to different levels of match and red for the unmatched classes is provided as a support to update the design and improve its traceability to the code.
  76. [76]P. Tonella, G. Antoniol, R. Fiutem, and F. Calzolari, “Reverse engineering 4.7 million lines of code,” Softw., Pract. Exper., vol. 30, no. 2, pp. 129–150, 2000.
    Bibtex
      @article{journals/spe/TonellaAFC00,
      author = {Tonella, Paolo and Antoniol, Giuliano and Fiutem, Roberto and Calzolari, F.},
      title = {Reverse engineering 4.7 million lines of code},
      journal = {Softw., Pract. Exper.},
      volume = {30},
      number = {2},
      year = {2000},
      pages = {129-150},
      ee = {http://dx.doi.org/10.1002/(SICI)1097-024X(200002)30:2$<$129::AID-SPE293$>$3.0.CO;2-M},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract The ITC-Irst Reverse Engineering group was charged with analyzing a software application of approximately 4.7 million lines of C code. It was an old legacy system, maintained for a long time, on which several successive adaptive and corrective maintenance interventions had led to the degradation of the original structure. The company decided to re-engineer the software instead of replacing it, because the complexity and costs of re-implementing the application from scratch could not be afforded, and the associated risk could not be run. Several problems were encountered during re-engineering, including identifying dependencies and detecting redundant functions that were not used anymore. To accomplish these goals, we adopted a conservative approach. Before performing any kind of analysis on the whole code, we carefully evaluated the expected costs. To this aim, a small but representative sample of modules was preliminarily analyzed, and the costs and outcomes were extrapolated so as to obtain some indications on the analysis of the whole system. When the results of the sample modules were found to be useful as well as affordable for the entire system, the resources involved were carefully distributed among the different reverse engineering tasks to meet the customer’s deadline. This paper summarizes that experience, discussing how we approached the problem, the way we managed the limited resources available to complete the task within the assigned deadlines, and the lessons we learned.
  77. [77]R. Fiutem, P. Tonella, G. Antoniol, and E. Merlo, “Points-to analysis for program understanding,” Journal of Systems and Software, vol. 44, no. 3, pp. 213–227, 1999.
    Bibtex
      @article{journals/jss/FiutemTAM99,
      author = {Fiutem, Roberto and Tonella, Paolo and Antoniol, Giuliano and Merlo, Ettore},
      title = {Points-to analysis for program understanding},
      journal = {Journal of Systems and Software},
      volume = {44},
      number = {3},
      year = {1999},
      pages = {213-227},
      ee = {http://dx.doi.org/10.1016/S0164-1212(98)10058-4},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Program understanding activities are more difficult for programs written in languages (such as C) that heavily make use of pointers for data structure manipulation because the programmer needs to build a mental model of the memory use and of the pointers to its locations. Pointers also pose additional problems to the tools supporting program understanding since they introduce additional dependences that have to be accounted for. This paper extends the flow insensitive context insensitive points-to analysis algorithm proposed by Steensgaard to cover arbitrary combinations of pointer dereferences array subscripts and field selections. It exhibits interesting properties among which scalability resulting from the low complexity and good performances. The results of the analysis are valuable by themselves as their graphical display represents the points-to links between locations. They are also integrated with other program understanding techniques like e.g. call graph construction slicing plan recognition and architectural recovery. The use of this algorithm in the framework of the program understanding environment CANTO is discussed.
  78. [78]P. Tonella, G. Antoniol, R. Fiutem, and E. Merlo, “Variable-precision reaching definitions analysis,” Journal of Software Maintenance, vol. 11, no. 2, pp. 117–142, 1999.
    Bibtex
      @article{journals/smr/TonellaAFM99,
      author = {Tonella, Paolo and Antoniol, Giuliano and Fiutem, Roberto and Merlo, Ettore},
      title = {Variable-precision reaching definitions analysis},
      journal = {Journal of Software Maintenance},
      volume = {11},
      number = {2},
      year = {1999},
      pages = {117-142},
      ee = {http://dx.doi.org/10.1002/(SICI)1096-908X(199903/04)11:2$<$117::AID-SMR185$>$3.0.CO;2-P},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  79. [79]R. Fiutem, G. Antoniol, P. Tonella, and E. Merlo, “ART: an architectural reverse engineering environment,” Journal of Software Maintenance, vol. 11, no. 5, pp. 339–364, 1999.
    Bibtex
      @article{journals/smr/FiutemATM99,
      author = {Fiutem, Roberto and Antoniol, Giuliano and Tonella, Paolo and Merlo, Ettore},
      title = {ART: an architectural reverse engineering environment},
      journal = {Journal of Software Maintenance},
      volume = {11},
      number = {5},
      year = {1999},
      pages = {339-364},
      ee = {http://dx.doi.org/10.1002/(SICI)1096-908X(199909/10)11:5$<$339::AID-SMR196$>$3.0.CO;2-I},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract When programmers perform maintenance tasks program understanding is often required. One of the first activities in understanding a software system is identifying its subsystems and their relations i.e. its software architecture. Since a large part of the effort is spent in creating a mental model of the system under study tools can help maintainers in managing the evolution of legacy systems by showing them architectural information. This paper describes an environment for the architectural recovery of software systems called Architectural Recovery Tool (ART). The environment is based on a hierarchical architectural model that drives the application of a set of recognizers each producing a different architectural view of a system or of some of its parts. Recognizers embody knowledge about architectural cliches and use flow analysis techniques to make their output more accurate. To test the accuracy and effectiveness of ART a suite of public domain applications containing interesting architectural organizations was selected as a benchmark. Results are presented by showing ART performance in terms of precision and recall of the architectural concept retrieval process. The results obtained show that cliche based architectural recovery is feasible and the recovered information can be a valuable support in reengineering and maintenance activities.
  80. [80]G. Antoniol, C. J. Lokan, G. Caldiera, and R. Fiutem, “A Function Point-Like Measure for Object-Oriented Software,” Empirical Software Engineering, vol. 4, no. 3, pp. 263–287, 1999.
    Bibtex
      @article{journals/ese/AntoniolLCF99,
      author = {Antoniol, Giuliano and Lokan, Christopher J. and Caldiera, Gianluigi and Fiutem, Roberto},
      title = {A Function Point-Like Measure for Object-Oriented Software},
      journal = {Empirical Software Engineering},
      volume = {4},
      number = {3},
      year = {1999},
      pages = {263-287},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract We present a method for estimating the size and consequently effort and duration of object oriented software development projects. Different estimates may be made in different phases of the development process according to the available information. We define an adaptation of traditional function points called Object Oriented Function Points to enable the measurement of object oriented analysis and design specifications. Tools have been constructed to automate the counting method. The novel aspect of our method is its flexibility. An organisation can experiment with different counting policies to find the most accurate predictors of size effort etc. in its environment. The method and preliminary results of its application in an industrial environment are presented and discussed.
  81. [81]M. Federico, M. Cettolo, F. Brugnara, and G. Antoniol, “Language modelling for efficient beam-search,” Computer Speech & Language, vol. 9, no. 4, pp. 353–379, 1995.
    Bibtex
      @article{journals/csl/FedericoCBA95,
      author = {Federico, Marcello and Cettolo, Mauro and Brugnara, Fabio and Antoniol, Giuliano},
      title = {Language modelling for efficient beam-search},
      journal = {Computer Speech {\&} Language},
      volume = {9},
      number = {4},
      year = {1995},
      pages = {353-379},
      ee = {http://dx.doi.org/10.1006/csla.1995.0017},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract This paper considers the problem of estimating bigram language models and of efficiently representing them by a finite state network which can be employed by an HMM based beam-search continuous speech recognizer. A review of the best known bigram estimation techniques is given together with description of the original Stacked model. LM comparisons in terms of perplexity are given for three text corpora with different data sparseness conditions while speech recognition accuracy tests are presented for a 10.000-word real-time speaker independent dictation task. The Stacked estimation method favorably compares with the best ones by achieving about 93% of word accuracy. If better language model estimates can improve recognition accuracy representations better suited to the search algorithm can improve its speed as well. Two static representations of language models are introduced: linear and tree-based. Results show that the latter organization is better exploited by the beam-search algorithm as it provides 5 time faster response with the same accuracy. Finally an off-line reduction algorithm is presented that cuts the space requirements of the tree-based topology to about 40%. The solutions proposed here was successfully employed in a real-time speaker independent 10.000-word real-time dictation system for radiological reporting.