Byyears

In journals

In proceedings

2022

  1. [1]F. Tambon et al., “How to certify machine learning based safety-critical systems? A systematic literature review,” Automated Software Engineering, vol. 29, Nov. 2022, . doi
    Bibtex
      @article{article,
      author = {Tambon, Florian and Laberge, Gabriel and An, Le and Nikanjam, Amin and Mindom, Paulina and Pequignot, Yann and Khomh, Foutse and Antoniol, Giulio and Merlo, Ettore and Laviolette, Francois},
      year = {2022},
      month = nov,
      pages = {},
      title = {How to certify machine learning based safety-critical systems? A systematic literature review},
      volume = {29},
      journal = {Automated Software Engineering},
      doi = {10.1007/s10515-022-00337-x}
    }
    
      
  2. [2]B. Asmare-Muse, C. Nagy, A. Cleve, F. Khomh, and G. Antoniol, “FIXME: Synchronize with Database - An Empirical Study of Data Access Self-Admitted Technical Debt,” Empirical Software Engineering , 2022.
    Bibtex
      @article{ed10dadd69224e058a193608d779f0b2,
      title = {FIXME: Synchronize with Database - An Empirical Study of Data Access Self-Admitted Technical Debt},
      author = {Asmare-Muse, Biruk and Nagy, Csaba and Cleve, Anthony and Khomh, Foutse and Antoniol, Giuliano},
      year = {2022},
      language = {English},
      journal = {Empirical Software Engineering },
      issn = {1382-3256},
      publisher = {Springer}
    }
    
      
  3. [3]F. Zampetti, S. Mudbhari, V. Arnaoudova, M. D. Penta, S. Panichella, and G. Antoniol, “Using code reviews to automatically configure static analysis tools,” Empir. Softw. Eng., vol. 27, no. 1, p. 28, 2022, . doi
    Bibtex
      @article{DBLP:journals/ese/ZampettiMAPPA22,
      author = {Zampetti, Fiorella and Mudbhari, Saghan and Arnaoudova, Venera and Penta, Massimiliano Di and Panichella, Sebastiano and Antoniol, Giuliano},
      title = {Using code reviews to automatically configure static analysis tools},
      journal = {Empir. Softw. Eng.},
      volume = {27},
      number = {1},
      pages = {28},
      year = {2022},
      url = {https://doi.org/10.1007/s10664-021-10076-4},
      doi = {10.1007/s10664-021-10076-4},
      timestamp = {Sat, 25 Dec 2021 15:51:52 +0100},
      biburl = {https://dblp.org/rec/journals/ese/ZampettiMAPPA22.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  4. [4]C. Coviello, S. Romano, G. Scanniello, and G. Antoniol, “GASSER: A Multi-Objective Evolutionary Approach for Test Suite Reduction,” Int. J. Softw. Eng. Knowl. Eng., vol. 32, no. 2, pp. 193–225, 2022, . doi
    Bibtex
      @article{DBLP:journals/ijseke/CovielloRSA22,
      author = {Coviello, Carmen and Romano, Simone and Scanniello, Giuseppe and Antoniol, Giuliano},
      title = {GASSER: A Multi-Objective Evolutionary Approach for Test Suite
                        Reduction},
      journal = {Int. J. Softw. Eng. Knowl. Eng.},
      volume = {32},
      number = {2},
      pages = {193--225},
      year = {2022},
      url = {https://doi.org/10.1142/S0218194022500085},
      doi = {10.1142/S0218194022500085},
      timestamp = {Wed, 07 Dec 2022 23:05:18 +0100},
      biburl = {https://dblp.org/rec/journals/ijseke/CovielloRSA22.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  5. [5]D. Humeniuk, F. Khomh, and G. Antoniol, “A search-based framework for automatic generation of testing environments for cyber-physical systems,” Inf. Softw. Technol., vol. 149, p. 106936, 2022, . doi
    Bibtex
      @article{DBLP:journals/infsof/HumeniukKA22,
      author = {Humeniuk, Dmytro and Khomh, Foutse and Antoniol, Giuliano},
      title = {A search-based framework for automatic generation of testing environments
                        for cyber-physical systems},
      journal = {Inf. Softw. Technol.},
      volume = {149},
      pages = {106936},
      year = {2022},
      url = {https://doi.org/10.1016/j.infsof.2022.106936},
      doi = {10.1016/J.INFSOF.2022.106936},
      timestamp = {Tue, 16 Aug 2022 23:05:36 +0200},
      biburl = {https://dblp.org/rec/journals/infsof/HumeniukKA22.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  6. [6]F. Zampetti, F. Belias, C. Zid, G. Antoniol, and M. D. Penta, “An Empirical Study on the Fault-Inducing Effect of Functional Constructs in Python,” in IEEE International Conference on Software Maintenance and Evolution, ICSME 2022, Limassol, Cyprus, October 3-7, 2022, 2022, pp. 47–58, . doi
    Bibtex
      @inproceedings{DBLP:conf/icsm/ZampettiBZAP22,
      author = {Zampetti, Fiorella and Belias, Fran{\c{c}}ois and Zid, Cyrine and Antoniol, Giuliano and Penta, Massimiliano Di},
      title = {An Empirical Study on the Fault-Inducing Effect of Functional Constructs
                        in Python},
      booktitle = {IEEE International Conference on Software Maintenance and Evolution,
                        {ICSME} 2022, Limassol, Cyprus, October 3-7, 2022},
      pages = {47--58},
      publisher = {IEEE},
      year = {2022},
      url = {https://doi.org/10.1109/ICSME55016.2022.00013},
      doi = {10.1109/ICSME55016.2022.00013},
      timestamp = {Wed, 11 Jan 2023 16:58:31 +0100},
      biburl = {https://dblp.org/rec/conf/icsm/ZampettiBZAP22.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  7. [7]B. A. Muse, F. Khomh, and G. Antoniol, “Do Developers Refactor Data Access Code? An Empirical Study,” in IEEE International Conference on Software Analysis, Evolution and Reengineering, SANER 2022, Honolulu, HI, USA, March 15-18, 2022, 2022, pp. 25–35, . doi
    Bibtex
      @inproceedings{DBLP:conf/wcre/MuseKA22,
      author = {Muse, Biruk Asmare and Khomh, Foutse and Antoniol, Giuliano},
      title = {Do Developers Refactor Data Access Code? An Empirical Study},
      booktitle = {IEEE International Conference on Software Analysis, Evolution and
                        Reengineering, {SANER} 2022, Honolulu, HI, USA, March 15-18, 2022},
      pages = {25--35},
      publisher = {IEEE},
      year = {2022},
      url = {https://doi.org/10.1109/SANER53432.2022.00014},
      doi = {10.1109/SANER53432.2022.00014},
      timestamp = {Tue, 26 Jul 2022 14:23:18 +0200},
      biburl = {https://dblp.org/rec/conf/wcre/MuseKA22.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      

2021

  1. [1]D. Humeniuk, G. Antoniol, and F. Khomh, “Data Driven Testing of Cyber Physical Systems,” in 14th IEEE/ACM International Workshop on Search-Based Software Testing, SBST 2021, Madrid, Spain, May 31, 2021, 2021, pp. 16–19, . doi
    Bibtex
      @inproceedings{DBLP:conf/sbst/HumeniukAK21,
      author = {Humeniuk, Dmytro and Antoniol, Giuliano and Khomh, Foutse},
      title = {Data Driven Testing of Cyber Physical Systems},
      booktitle = {14th {IEEE/ACM} International Workshop on Search-Based Software Testing,
                     {SBST} 2021, Madrid, Spain, May 31, 2021},
      pages = {16--19},
      publisher = {IEEE},
      year = {2021},
      url = {https://doi.org/10.1109/SBST52555.2021.00010},
      doi = {10.1109/SBST52555.2021.00010},
      timestamp = {Tue, 03 Aug 2021 15:18:49 +0200},
      biburl = {https://dblp.org/rec/conf/sbst/HumeniukAK21.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  2. [2]M. Vahedi, M. M. Rahman, F. Khomh, G. Uddin, and G. Antoniol, “Summarizing Relevant Parts from Technical Videos,” in 28th IEEE International Conference on Software Analysis, Evolution and Reengineering, SANER 2021, Honolulu, HI, USA, March 9-12, 2021, 2021, pp. 434–445, . doi
    Bibtex
      @inproceedings{DBLP:conf/wcre/Vahedi0K0A21,
      author = {Vahedi, Mahmood and Rahman, Mohammad Masudur and Khomh, Foutse and Uddin, Gias and Antoniol, Giuliano},
      title = {Summarizing Relevant Parts from Technical Videos},
      booktitle = {28th {IEEE} International Conference on Software Analysis, Evolution
                     and Reengineering, {SANER} 2021, Honolulu, HI, USA, March 9-12, 2021},
      pages = {434--445},
      publisher = {IEEE},
      year = {2021},
      url = {https://doi.org/10.1109/SANER50967.2021.00047},
      doi = {10.1109/SANER50967.2021.00047},
      timestamp = {Tue, 18 May 2021 13:25:35 +0200},
      biburl = {https://dblp.org/rec/conf/wcre/Vahedi0K0A21.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  3. [3]A. Hamidi, G. Antoniol, F. Khomh, M. D. Penta, and M. Hamidi, “Towards Understanding Developers’ Machine-Learning Challenges: A Multi-Language Study on Stack Overflow,” in 21st IEEE International Working Conference on Source Code Analysis and Manipulation, SCAM 2021, Luxembourg, September 27-28, 2021, 2021, pp. 58–69, . doi
    Bibtex
      @inproceedings{DBLP:conf/scam/HamidiAKPH21,
      author = {Hamidi, Alaleh and Antoniol, Giuliano and Khomh, Foutse and Penta, Massimiliano Di and Hamidi, Mohammad},
      title = {Towards Understanding Developers' Machine-Learning Challenges: {A}
                     Multi-Language Study on Stack Overflow},
      booktitle = {21st IEEE International Working Conference on Source Code Analysis
                     and Manipulation, {SCAM} 2021, Luxembourg, September 27-28, 2021},
      pages = {58--69},
      publisher = {IEEE},
      year = {2021},
      url = {https://doi.org/10.1109/SCAM52516.2021.00016},
      doi = {10.1109/SCAM52516.2021.00016},
      timestamp = {Wed, 01 Dec 2021 17:46:15 +0100},
      biburl = {https://dblp.org/rec/conf/scam/HamidiAKPH21.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  4. [4]D. Humeniuk, G. Antoniol, and F. Khomh, “SWAT tool at the SBST 2021 Tool Competition,” in 14th IEEE/ACM International Workshop on Search-Based Software Testing, SBST 2021, Madrid, Spain, May 31, 2021, 2021, pp. 42–43, . doi
    Bibtex
      @inproceedings{DBLP:conf/sbst/HumeniukAK21a,
      author = {Humeniuk, Dmytro and Antoniol, Giuliano and Khomh, Foutse},
      title = {SWAT tool at the {SBST} 2021 Tool Competition},
      booktitle = {14th {IEEE/ACM} International Workshop on Search-Based Software Testing,
                     {SBST} 2021, Madrid, Spain, May 31, 2021},
      pages = {42--43},
      publisher = {IEEE},
      year = {2021},
      url = {https://doi.org/10.1109/SBST52555.2021.00019},
      doi = {10.1109/SBST52555.2021.00019},
      timestamp = {Tue, 03 Aug 2021 15:18:49 +0200},
      biburl = {https://dblp.org/rec/conf/sbst/HumeniukAK21a.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      

2020

  1. [1]L. Ardito, M. Torchiano, R. Coppola, and G. Antoniol, “PowTrAn: An R Package for power trace analysis,” SoftwareX, vol. 12, p. 100512, 2020, . doi
    Bibtex
      @article{DBLP:journals/softx/ArditoTCA20,
      author = {Ardito, Luca and Torchiano, Marco and Coppola, Riccardo and Antoniol, Giulio},
      title = {PowTrAn: An {R} Package for power trace analysis},
      journal = {SoftwareX},
      volume = {12},
      pages = {100512},
      year = {2020},
      url = {https://doi.org/10.1016/j.softx.2020.100512},
      doi = {10.1016/j.softx.2020.100512},
      timestamp = {Mon, 03 Jan 2022 22:05:31 +0100},
      biburl = {https://dblp.org/rec/journals/softx/ArditoTCA20.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  2. [2]B. A. Muse, M. M. Rahman, C. Nagy, A. Cleve, F. Khomh, and G. Antoniol, “On the Prevalence, Impact, and Evolution of SQL Code Smells in Data-Intensive Systems,” in MSR ’20: 17th International Conference on Mining Software Repositories, Seoul, Republic of Korea, 29-30 June, 2020, 2020, pp. 327–338, . doi
    Bibtex
      @inproceedings{DBLP:conf/msr/MuseR0CKA20,
      author = {Muse, Biruk Asmare and Rahman, Mohammad Masudur and Nagy, Csaba and Cleve, Anthony and Khomh, Foutse and Antoniol, Giuliano},
      editor = {Kim, Sunghun and Gousios, Georgios and Nadi, Sarah and Hejderup, Joseph},
      title = {On the Prevalence, Impact, and Evolution of {SQL} Code Smells in Data-Intensive
                     Systems},
      booktitle = {MSR '20: 17th International Conference on Mining Software Repositories,
                     Seoul, Republic of Korea, 29-30 June, 2020},
      pages = {327--338},
      publisher = {ACM},
      year = {2020},
      url = {https://doi.org/10.1145/3379597.3387467},
      doi = {10.1145/3379597.3387467},
      timestamp = {Sun, 25 Jul 2021 11:49:53 +0200},
      biburl = {https://dblp.org/rec/conf/msr/MuseR0CKA20.bib},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  3. [3]R. Morales, F. Khomh, and G. Antoniol, “ RePOR: Mimicking Humans on Refactoring Tasks. Are We There Yet?,” Journal of Empirical Software Engineering (EMSE), 2020.
    Bibtex
      @article{DBLP:journals/emse/Morales2020,
      author = {Morales, Rodrigo and Khomh, Foutse and Antoniol, Giuliano},
      title = { RePOR: Mimicking Humans on Refactoring Tasks. Are We There Yet?,},
      journal = {Journal of Empirical Software Engineering (EMSE)},
      booktitle = {EMSE},
      year = {2020}
    }
    
      
  4. [4]A. M. Biruk, M. R. Mohammad, C. Nagy, A. Cleve, F. Khomh, and G. Antoniol, “On the Prevalence, Impact, and Evolution of SQL code smells in Data-Intensive Systems,” United States, 2020.
    Bibtex
      @inproceedings{05f6318274934e559bc515cfb19d5be8,
      title = {On the Prevalence, Impact, and Evolution of SQL code smells in Data-Intensive Systems},
      author = {Biruk, Asmare Muse and Mohammad, Masudur Rahman and Nagy, Csaba and Cleve, Anthony and Khomh, Foutse and Antoniol, Giuliano},
      year = {2020},
      language = {English},
      booktitle = {[Provisoire] Proceedings of the 17th International Conference on Mining Software Repositories (MSR 2020)},
      publisher = {ACM Press},
      address = {United States}
    }
    
      
  5. [5]C. Coviello, S. Romano, G. Scanniello, A. Marchetto, A. Corazza, and G. Antoniol, “Adequate vs. inadequate test suite reduction approaches,” Information & Software Technology, vol. 119, 2020, . doi
    Bibtex
      @article{DBLP:journals/infsof/CovielloRSMCA20,
      author = {Coviello, Carmen and Romano, Simone and Scanniello, Giuseppe and Marchetto, Alessandro and Corazza, Anna and Antoniol, Giuliano},
      title = {Adequate vs. inadequate test suite reduction approaches},
      journal = {Information {\&} Software Technology},
      volume = {119},
      year = {2020},
      url = {https://doi.org/10.1016/j.infsof.2019.106224},
      doi = {10.1016/j.infsof.2019.106224},
      timestamp = {Thu, 06 Feb 2020 18:12:34 +0100},
      biburl = {https://dblp.org/rec/bib/journals/infsof/CovielloRSMCA20},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  6. [6]G. Antoniol and F. Sarro, “Guest editorial: Special section on Search-based Software Engineering track at GECCO 2018,” Information & Software Technology, vol. 118, 2020, . doi
    Bibtex
      @article{DBLP:journals/infsof/AntoniolS20,
      author = {Antoniol, Giuliano and Sarro, Federica},
      title = {Guest editorial: Special section on Search-based Software Engineering
                     track at {GECCO} 2018},
      journal = {Information {\&} Software Technology},
      volume = {118},
      year = {2020},
      url = {https://doi.org/10.1016/j.infsof.2019.106223},
      doi = {10.1016/j.infsof.2019.106223},
      timestamp = {Tue, 17 Dec 2019 10:02:41 +0100},
      biburl = {https://dblp.org/rec/bib/journals/infsof/AntoniolS20},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  7. [7]Y. Hashemi, M. Nayebi, and G. Antoniol, “Documentation of Machine Learning Software,” in SANER, 2020, pp. To Appear.
    Bibtex
      @inproceedings{Yalda2020,
      author = {Hashemi, Yalda and Nayebi, Maleknaz and Antoniol, Giuliano},
      title = {Documentation of Machine Learning Software},
      booktitle = {SANER},
      year = {2020},
      pages = {To Appear}
    }
    
      

2019

  1. [1]D. Johannes, F. Khomh, and G. Antoniol, “A large-scale empirical study of code smells in JavaScript projects,” Software Quality Journal, vol. 27, no. 3, pp. 1271–1314, 2019, . doi
    Bibtex
      @article{DBLP:journals/sqj/JohannesKA19,
      author = {Johannes, David and Khomh, Foutse and Antoniol, Giuliano},
      title = {A large-scale empirical study of code smells in JavaScript projects},
      journal = {Software Quality Journal},
      volume = {27},
      number = {3},
      pages = {1271--1314},
      year = {2019},
      url = {https://doi.org/10.1007/s11219-019-09442-9},
      doi = {10.1007/s11219-019-09442-9},
      timestamp = {Sat, 12 Oct 2019 12:51:21 +0200},
      biburl = {https://dblp.org/rec/bib/journals/sqj/JohannesKA19},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      

2018

  1. [1]R. Saborido, R. Morales, F. Khomh, Y.-G. Gueheneuc, and G. Antoniol, “Getting the most from map data structures in Android,” Empirical Software Engineering, Mar. 2018, . <a href="http://doi.org/"10.1007/s10664-018-9607-8">doi</a>
    Bibtex
      @article{Saborido2018,
      author = {Saborido, Ruben and Morales, Rodrigo and Khomh, Foutse and Gueheneuc, Yann-Gael and Antoniol, Giuliano},
      title = {Getting the most from map data structures in Android},
      journal = {Empirical Software Engineering},
      year = {2018},
      month = mar,
      issn = {1573-7616},
      doi = {"10.1007/s10664-018-9607-8},
      url = {https://doi.org/10.1007/s10664-018-9607-8}
    }
    
      
    Abstract A map is a data structure that is commonly used to store data as key–value pairs and retrieve data as keys, values, or key–value pairs. Although Java offers different map implementation classes, Android SDK offers other implementations supposed to be more efficient than HashMap: ArrayMap and SparseArray variants (SparseArray, LongSparseArray, SparseIntArray, SparseLongArray, and SparseBooleanArray). Yet, the performance of these implementations in terms of CPU time, memory usage, and energy consumption is lacking in the official Android documentation; although saving CPU, memory, and energy is a major concern of users wanting to increase battery life. Consequently, we study the use of map implementations by Android developers in two ways. First, we perform an observational study of 5713 Android apps in GitHub. Second, we conduct a survey to assess developers’ perspective on Java and Android map implementations. Then, we perform an experimental study comparing HashMap, ArrayMap, and SparseArray variants map implementations in terms of CPU time, memory usage, and energy consumption. We conclude with guidelines for choosing among the map implementations: HashMap is preferable over ArrayMap to improve energy efficiency of apps, and SparseArray variants should be used instead of HashMap and ArrayMap when keys are primitive types.
  2. [2]S. Romano, G. Scanniello, G. Antoniol, and A. Marchetto, “SPIRITuS: a SimPle Information Retrieval regressIon Test Selection approach,” International Journal on Information and Software Technology, pp. To-Appear, 2018. doi
    Bibtex
      @article{Spiritus2018,
      author = {Romano, Simone and Scanniello, Giuseppe and Antoniol, Giulio and Marchetto, Alessandro},
      title = {SPIRITuS: a SimPle Information Retrieval regressIon Test Selection approach},
      journal = {International Journal on Information and Software Technology},
      volume = {},
      pages = {To-Appear},
      year = {2018},
      url = {},
      doi = {}
    }
    
      
  3. [3]S. Fakhoury, V. Arnaoudova, C. Noiseux, F. Khomh, and G. Antoniol, “Keep it simple: is deep learning good for linguistic smell detection?,” Feb. 2018.
    Bibtex
      @inproceedings{Fakhoury:saner:CNN,
      title = {Keep it simple: is deep learning good for linguistic smell detection?},
      author = {Fakhoury, Sarah and Arnaoudova, Venera and Noiseux, Cedric and Khomh, Foutse and Antoniol, Giuliano},
      year = {2018},
      date = {2018-02-22},
      booktitle = {Proceedings of the International Conference on Software Analysis, Evolution, and Reengineering (SANER)—REproducibility Studies and NEgative Results (RENE) Track},
      keywords = {deep learning, empirical study, linguistic antipatterns, machine learning, source code identifiers, source code readability}
    }
    
      
  4. [4]C. Coviello, S. Romano, G. Scanniello, A. Marchetto, G. Antoniol, and A. Corazza, “Clustering Support for Inadequate Test Suite Reduction,” Feb. 2018.
    Bibtex
      @inproceedings{Coviello2018,
      title = {Clustering Support for Inadequate Test Suite Reduction},
      author = {Coviello, Carmen and Romano, Simone and Scanniello, Giuseppe and Marchetto, Alessandro and Antoniol, Giuliano and Corazza, Anna},
      year = {2018},
      date = {2018-02-22},
      booktitle = {Proceedings of the International Conference on Software Analysis, Evolution, and Reengineering (SANER)}
    }
    
      
  5. [5]R. Morales, F. Chicano, F. Khomh, and G. Antoniol, “Exact search-space size for the refactoring scheduling problem,” Autom. Softw. Eng., vol. 25, no. 2, pp. 195–200, 2018, . doi
    Bibtex
      @article{DBLP:journals/ase/MoralesCKA18,
      author = {Morales, Rodrigo and Chicano, Francisco and Khomh, Foutse and Antoniol, Giuliano},
      title = {Exact search-space size for the refactoring scheduling problem},
      journal = {Autom. Softw. Eng.},
      volume = {25},
      number = {2},
      pages = {195--200},
      year = {2018},
      url = {https://doi.org/10.1007/s10515-017-0213-6},
      doi = {10.1007/s10515-017-0213-6},
      timestamp = {Tue, 15 May 2018 12:18:05 +0200},
      biburl = {https://dblp.org/rec/bib/journals/ase/MoralesCKA18},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  6. [6]R. Morales, F. Chicano, F. Khomh, and G. Antoniol, “Efficient refactoring scheduling based on partial order reduction,” Journal of Systems and Software, vol. 145, pp. 25–51, 2018, . doi
    Bibtex
      @article{DBLP:journals/jss/MoralesCKA18,
      author = {Morales, Rodrigo and Chicano, Francisco and Khomh, Foutse and Antoniol, Giuliano},
      title = {Efficient refactoring scheduling based on partial order reduction},
      journal = {Journal of Systems and Software},
      volume = {145},
      pages = {25--51},
      year = {2018},
      url = {https://doi.org/10.1016/j.jss.2018.07.076},
      doi = {10.1016/j.jss.2018.07.076},
      timestamp = {Fri, 02 Nov 2018 15:38:55 +0100},
      biburl = {https://dblp.org/rec/bib/journals/jss/MoralesCKA18},
      bibsource = {dblp computer science bibliography, https://dblp.org}
    }
    
      
  7. [7]Z. Soh, F. Khomh, Y.-G. Guéhéneuc, and G. Antoniol, “Noise in Mylyn interaction traces and its impact on developers and recommendation systems,” Empirical Software Engineering Journal, vol. 23, pp. 645–692, 2018. doi
    Bibtex
      @article{Zephiryn1017,
      author = {Soh, Z{\'{e}}phyrin and Khomh, Foutse and Gu{\'{e}}h{\'{e}}neuc, Yann{-}Ga{\"{e}}l and Antoniol, Giuliano},
      title = {Noise in Mylyn interaction traces and its impact on
      developers and recommendation systems},
      journal = {Empirical Software Engineering Journal},
      volume = {23},
      pages = {645-692},
      year = {2018},
      url = {},
      doi = {}
    }
    
      

2017

  1. [1]R. Morales, R. Saborido, F. Khomh, F. Chicano, and G. Antoniol, “EARMO: An Energy-Aware Refactoring Approach for Mobile Apps,” IEEE Transactions on Software Engineering, pp. 1–1, Jan. 2017, . doi
    Bibtex
      @article{Morales2018,
      title = {EARMO: An Energy-Aware Refactoring Approach for Mobile Apps},
      journal = {IEEE Transactions on Software Engineering},
      year = {2017},
      month = jan,
      pages = {1 - 1},
      issn = {0098-5589},
      doi = {10.1109/TSE.2017.2757486},
      url = {http://ieeexplore.ieee.org/document/8052533/http://xplorestaging.ieee.org/ielx7/32/4359463/08052533.pdf?arnumber=8052533},
      author = {Morales, Rodrigo and Saborido, Ruben and Khomh, Foutse and Chicano, Francisco and Antoniol, Giuliano}
    }
    
      
  2. [2]F. Zampetti, C. Noiseux, G. Antoniol, F. Khomh, and M. D. Penta, “Recommending when Design Technical Debt Should be Self-Admitted,” in ICSME: The International Conference on Software Maintenance and Evolution, 2017, pp. 216–226.
    Bibtex
      @inproceedings{Cedric17,
      author = {Zampetti, Fiorella and Noiseux, Cédric and Antoniol, Giuliano and Khomh, Foutse and Penta, Massimiliano Di},
      title = {Recommending when Design Technical Debt Should be Self-Admitted},
      booktitle = {ICSME: The International Conference on Software Maintenance and Evolution},
      pages = {216-226},
      year = {2017}
    }
    
      
  3. [3]P. Galinier, S. Kpodjedo, and G. Antoniol, “A penalty-based Tabu search for constrained covering arrays,” in GECCO: The Genetic and Evolutionary Computation Conference, 2017, pp. 1288–1294.
    Bibtex
      @inproceedings{SeglaKPG17,
      author = {Galinier, Philppe and Kpodjedo, Segla and Antoniol, Giuliano},
      title = {A penalty-based Tabu search for constrained covering arrays},
      booktitle = {GECCO: The Genetic and Evolutionary Computation Conference},
      pages = {1288-1294},
      year = {2017}
    }
    
      
  4. [4]R. Morales, Z. Soh, F. Khomh, G. Antoniol, and F. Chicano, “On the use of developers’ context for automatic refactoring of software anti-patterns,” Journal of Systems and Software, vol. 128, pp. 236–251, 2017, . doi
    Bibtex
      @article{MoralesSKAC17,
      author = {Morales, Rodrigo and Soh, Z{\'{e}}phyrin and Khomh, Foutse and Antoniol, Giuliano and Chicano, Francisco},
      title = {On the use of developers' context for automatic refactoring of software
                     anti-patterns},
      journal = {Journal of Systems and Software},
      volume = {128},
      pages = {236--251},
      year = {2017},
      url = {https://doi.org/10.1016/j.jss.2016.05.042},
      doi = {10.1016/j.jss.2016.05.042}
    }
    
      
  5. [5]R. Saborido, F. Khomh, G. Antoniol, and Y.-G. Guéhéneuc, “Comprehension of ads-supported and paid Android applications: are they different?,” in Proceedings of the 25th International Conference on Program Comprehension, ICPC 2017, Buenos Aires, Argentina, May 22-23, 2017, 2017, pp. 143–153.
    Bibtex
      @inproceedings{SaboridoKAG17,
      author = {Saborido, Rub{\'{e}}n and Khomh, Foutse and Antoniol, Giuliano and Gu{\'{e}}h{\'{e}}neuc, Yann{-}Ga{\"{e}}l},
      title = {Comprehension of ads-supported and paid Android applications: are
                     they different?},
      booktitle = {Proceedings of the 25th International Conference on Program Comprehension,
                     {ICPC} 2017, Buenos Aires, Argentina, May 22-23, 2017},
      pages = {143--153},
      year = {2017}
    }
    
      
  6. [6]M. Moussa, D. P. Massimiliano, G. Antoniol, and G. Beltrame, “ACCUSE: Helping Users to Minimize Android App Privacy Concerns,” in 4th IEEE/ACM International Conference on Mobile Software Engineering and Systems, 2017, pp. 144–148.
    Bibtex
      @inproceedings{MoussaACCUSE2017,
      title = {ACCUSE: Helping Users to Minimize Android App Privacy Concerns},
      author = {Moussa, Majda and Massimiliano, Di Penta and Antoniol, Giuliano and Beltrame, Giovanni},
      booktitle = {4th IEEE/ACM International Conference on Mobile Software Engineering and Systems},
      year = {2017},
      organization = {IEEE, ACM},
      keyword = {energy optimization},
      pages = {144-148}
    }
    
      
  7. [7]L. An, O. Mlouki, F. Khomh, and G. Antoniol, “Stack Overflow: A code laundering platform?,” in IEEE 24th International Conference on Software Analysis, Evolution and Reengineering, SANER 2017, Klagenfurt, Austria, February 20-24, 2017, 2017, pp. 283–293, . doi
    Bibtex
      @inproceedings{AnMKA17,
      author = {An, Le and Mlouki, Ons and Khomh, Foutse and Antoniol, Giuliano},
      title = {Stack Overflow: {A} code laundering platform?},
      booktitle = {IEEE 24th International Conference on Software Analysis, Evolution
                     and Reengineering, {SANER} 2017, Klagenfurt, Austria, February 20-24,
                     2017},
      pages = {283--293},
      year = {2017},
      doi = {10.1109/SANER.2017.7884629}
    }
    
      
  8. [8]A. Saboury, P. Musavi, F. Khomh, and G. Antoniol, “An empirical study of code smells in JavaScript projects,” in IEEE 24th International Conference on Software Analysis, Evolution and Reengineering, SANER 2017, Klagenfurt, Austria, February 20-24, 2017, 2017, pp. 294–305, . doi
    Bibtex
      @inproceedings{SabouryMKA17,
      author = {Saboury, Amir and Musavi, Pooya and Khomh, Foutse and Antoniol, Giulio},
      title = {An empirical study of code smells in JavaScript projects},
      booktitle = {IEEE 24th International Conference on Software Analysis, Evolution
                     and Reengineering, {SANER} 2017, Klagenfurt, Austria, February 20-24,
                     2017},
      pages = {294--305},
      doi = {10.1109/SANER.2017.7884630},
      year = {2017}
    }
    
      

2016

  1. [1]W. Wu, F. Khomh, B. Adams, Y. G. Guéhéneuc, and G. Antoniol, “An exploratory study of api changes and usages based on apache and eclipse ecosystems,” Empirical Software Engineering, vol. 21, no. 6, pp. 2366–2412, 2016, . doi
    Bibtex
      @article{Wei2015emse,
      author = {Wu, Wei and Khomh, Foutse and Adams, Bram and Gu{\'{e}}h{\'{e}}neuc, Yann Ga{\"{e}}l and Antoniol, Giuliano},
      title = {An exploratory study of api changes and usages based on apache and eclipse ecosystems},
      journal = {Empirical Software Engineering},
      volume = {21},
      number = {6},
      pages = {2366--2412},
      year = {2016},
      url = {http://dx.doi.org/10.1007/s10664-015-9411-7},
      doi = {10.1007/s10664-015-9411-7}
    }
    
      
    Abstract Frameworks are widely used in modern software development to reduce development costs. They are accessed through their Application Programming Interfaces (APIs), which specify the contracts with client programs. When frameworks evolve, API backward-compatibility cannot always be guaranteed and client programs must upgrade to use the new releases. Because framework upgrades are not cost-free, observing API changes and usages together at fine-grained levels is necessary to help developers understand, assess, and forecast the cost of each framework upgrade. Whereas previous work studied API changes in frameworks and API usages in client programs separately, we analyse and classify API changes and usages together in 22 framework releases from the Apache and Eclipse ecosystems and their client programs. We find that (1) missing classes and methods happen more often in frameworks and affect client programs more often than the other API change types do, (2) missing interfaces occur rarely in frameworks but affect client programs often, (3) framework APIs are used on average in 35% of client classes and interfaces, (4) most of such usages could be encapsulated locally and reduced in number, and (5) about 11% of APIs usages could cause ripple effects in client programs when these APIs change. Based on these findings, we provide suggestions for developers and researchers to reduce the impact of API evolution through language mechanisms and design strategies.
  2. [2]A. Sabane, Y. G. Guéhéneuc, V. Arnaudova, and G. Antoniol, “Fragile Base-class Problem, Problem?,” Empirical Software Engineering, pp. 1–46, 2016, . doi
    Bibtex
      @article{Aminta2016ense,
      author = {Sabane, Aminata and Gu{\'{e}}h{\'{e}}neuc, Yann Ga{\"{e}}l and Arnaudova, Venera and Antoniol, Giuliano},
      title = {Fragile Base-class Problem, Problem?},
      journal = {Empirical Software Engineering},
      pages = {1--46},
      year = {2016},
      doi = {10.1007/s10664-016-9448-2},
      ee = {http://dx.doi.org/10.1007/s10664-016-9448-2}
    }
    
      
    Abstract The fragile base-class problem (FBCP) has been described in the literature as a consequence of “misusing”inheritance and composition in object-oriented programming when (re)using frameworks. Many research works have focused on preventing the FBCP by proposing alternative mechanisms for reuse, but, to the best of our knowledge, there is no previous research work studying the prevalence and impact of the FBCP in real-world software systems. The goal of our work is thus twofold: (1) assess, in different systems, the prevalence of micro-architectures, called FBCS, that could lead to two aspects of the FBCP, (2) investigate the relation between the detected occurrences and the quality of the systems in terms of change and fault proneness, and (3) assess whether there exist bugs in these systems that are related to the FBCP. We therefore perform a quantitative and a qualitative study. Quantitatively, we analyse multiple versions of seven different open-source systems that use 58 different frameworks, resulting in 301 configurations. We detect in these systems 112,263 FBCS occurrences and we analyse whether classes playing the role of sub-classes in FBCS occurrences are more change and–or fault prone than other classes. Results show that classes participating in the analysed FBCS are neither more likely to change nor more likely to have faults. Qualitatively, we conduct a survey to confirm/infirm that some bugs are related to the FBCP. The survey involves 41 participants that analyse a total of 104 bugs of three open-source systems. Results indicate that none of the analysed bugs is related to the FBCP. Thus, despite large, rigorous quantitative and qualitative studies, we must conclude that the two aspects of the FBCP that we analyse may not be as problematic in terms of change and fault-proneness as previously thought in the literature. We propose reasons why the FBCP may not be so prevalent in the analysed systems and in other systems in general.
  3. [3]R. Morales, Z. Soh, F. Khomh, G. Antoniol, and F. Chicano, “On the use of developers’ context for automatic refactoring of software anti-patterns ,” Journal of Systems and Software , p. - , 2016, . doi
    Bibtex
      @article{Morales2016,
      title = {On the use of developers’ context for automatic refactoring of software anti-patterns },
      journal = {Journal of Systems and Software },
      volume = {},
      number = {},
      pages = { - },
      year = {2016},
      note = {},
      issn = {0164-1212},
      doi = {http://dx.doi.org/10.1016/j.jss.2016.05.042},
      ee = {http://www.sciencedirect.com/science/article/pii/S0164121216300632},
      author = {Morales, Rodrigo and Soh, Zéphyrin and Khomh, Foutse and Antoniol, Giuliano and Chicano, Francisco}
    }
    
      
    Abstract Abstract Anti-patterns are poor solutions to design problems that make software systems hard to understand and extend. Entities involved in anti-patterns are reported to be consistently related to high change and fault rates. Refactorings, which are behavior preserving changes are often performed to remove anti-patterns from software systems. Developers are advised to interleave refactoring activities with their regular coding tasks to remove anti-patterns, and consequently improve software design quality. However, because the number of anti-patterns in a software system can be very large, and their interactions can require a solution in a set of conflicting objectives, the process of manual refactoring can be overwhelming. To automate this process, previous works have modeled anti-patterns refactoring as a batch process where a program provides a solution for the total number of classes in a system, and the developer has to examine a long list of refactorings, which is not feasible in most situations. Moreover, these proposed solutions often require that developers modify classes on which they never worked before (i.e., classes on which they have little or no knowledge). To improve on these limitations, this paper proposes an automated refactoring approach, ReCon (Refactoring approach based on task Context), that leverages information about a developer’s task (i.e., the list of code entities relevant to the developer’s task) and metaheuristics techniques to compute the best sequence of refactorings that affects only entities in the developer’s context. We mine 1705 task contexts (collected using the Eclipse plug-in Mylyn) and 1013 code snapshots from three open-source software projects (Mylyn, PDE, Eclipse Platform) to assess the performance of our proposed approach. Results show that ReCon can remove more than 50% of anti-patterns in a software system, using fewer resources than the traditional approaches from the literature.
  4. [4]L. Guerrouj et al., “An Empirical Study on the Impact of Lexical Smells on Change- and Fault- Proneness,” Software Quality Journal, 2016.
    Bibtex
      @article{latifa2016,
      author = {Guerrouj, Latifa and Kermansaravi, Zeinab and Arnaoudouva, Venera and Fung, Benjamin and Khomh, Foutse and Antoniol, Giuliano and Gueheneuc, Yann-Gael},
      title = {An Empirical Study on the Impact of Lexical Smells on Change- and Fault- Proneness},
      journal = {Software Quality Journal},
      year = {2016},
      ee = {http://link.springer.com/article/10.1007/s11219-016-9318-6}
    }
    
      
    Abstract Anti-patterns are poor design choices that hinder code evolution, and understandability. Practitioners perform refactoring, that are semantic-preserving-code transformations, to correct anti-patterns and to improve design quality. However, manual refactoring is a consuming task and a heavy burden for developers who have to struggle to complete their coding tasks and maintain the design quality of the system at the same time. For that reason, researchers and practitioners have proposed several approaches to bring automated support to developers, with solutions that ranges from single anti-patterns correction, to multiobjective solutions. The latter attempt to reduce refactoring effort, or to improve semantic similarity between classes and methods in addition to remove anti-patterns. To the best of our knowledge none of the previous approaches have considered the impact of refactoring on another important aspect of software development, which is the testing effort. In this paper we propose a novel search-based multiobjective approach for removing five well-know anti-patterns and minimizing testing effort. To assess the effectiveness of our proposed approach, we implement three different multiobjective metaheuristics (NSGA-II, SPEA2, MOCell) and apply them to a benchmark comprised of four open-source systems. Results show that MOCell is the metaheuristic that provides the best performance.
  5. [5]R. Morales, A. Sabane, P. Musavi, F. Khomh, F. Chicano, and G. Antoniol, “Finding the Best Compromise Between Design Quality and Testing Effort During Refactoring,” in SANER, 2016, pp. 24–35, . doi
    Bibtex
      @inproceedings{rodrigo2016saner,
      author = {Morales, Rodrigo and Sabane, Aminata and Musavi, Pooya and Khomh, Foutse and Chicano, Francisco and Antoniol, Giulio},
      title = {Finding the Best Compromise Between Design Quality and Testing Effort During Refactoring},
      booktitle = {SANER},
      pages = {24--35},
      year = {2016},
      crossref = {DBLP:conf/wcre/2016},
      url = {http://dx.doi.org/10.1109/SANER.2016.23},
      doi = {10.1109/SANER.2016.23}
    }
    
      
    Abstract Anti-patterns are poor design choices that hinder code evolution, and understandability. Practitioners perform refactoring, that are semantic-preserving-code transformations, to correct anti-patterns and to improve design quality. However, manual refactoring is a consuming task and a heavy burden for developers who have to struggle to complete their coding tasks and maintain the design quality of the system at the same time. For that reason, researchers and practitioners have proposed several approaches to bring automated support to developers, with solutions that ranges from single anti-patterns correction, to multiobjective solutions. The latter attempt to reduce refactoring effort, or to improve semantic similarity between classes and methods in addition to remove anti-patterns. To the best of our knowledge none of the previous approaches have considered the impact of refactoring on another important aspect of software development, which is the testing effort. In this paper we propose a novel search-based multiobjective approach for removing five well-know anti-patterns and minimizing testing effort. To assess the effectiveness of our proposed approach, we implement three different multiobjective metaheuristics (NSGA-II, SPEA2, MOCell) and apply them to a benchmark comprised of four open-source systems. Results show that MOCell is the metaheuristic that provides the best performance.
  6. [6]O. Mlouki, F. Khomh, and G. Antoniol, “On the Detection of Licenses Violations in Android Ecosystem,” in SANER, 2016, pp. 382–392, . doi
    Bibtex
      @inproceedings{ons2016saner,
      author = {Mlouki, Ons and Khomh, Foutse and Antoniol, Giulio},
      title = {On the Detection of Licenses Violations in Android Ecosystem},
      booktitle = {SANER},
      year = {2016},
      pages = {382-392},
      url = {http://dx.doi.org/10.1109/SANER.2016.73},
      doi = {10.1109/SANER.2016.73}
    }
    
      
    Abstract Mobile applications (apps), developers often reuse code from existing libraries and frameworks in order to reduce development costs. However, these libraries and frameworks are governed by licenses to which developers must comply. A failure to comply with a license is likely to result in penalties and fines. In this paper we define a three steps approach that helps to identify licenses used in a system and thus to detect licenses violations. We validate our approach in a set of apps from the F-droid market1 . We identify first the most common license used in mobile open source apps. Then we propose our model that identify licenses across different categories of mobile apps, some kinds of violation and licence changes in the process of software
  7. [7]R. Saborido-Infantes, G. Beltrame, F. Khomh, E. Alba, and G. Antoniol, “Optimizing User Experience in Choosing Android Applications,” in SANER, 2016, pp. 438–448, . doi
    Bibtex
      @inproceedings{ruben2016saner,
      author = {Saborido-Infantes, Ruben and Beltrame, Giovanni and Khomh, Foutse and Alba, Enrique and Antoniol, Giulio},
      title = {Optimizing User Experience in Choosing Android Applications},
      booktitle = {SANER},
      year = {2016},
      pages = {438-448},
      url = {http://dx.doi.org/10.1109/SANER.2016.64},
      doi = {10.1109/SANER.2016.64}
    }
    
      
    Abstract Why is my cell phone battery already low? How did I use almost all the data of my monthly Internet plan? Is my recently released new application more efficient than similar competing applications? These are not easy questions to answer. Different applications implementing similar or identical functionalities may have different energy consumptions. In this paper, we present a recommendation system aimed at helping users and developers alike. We help users to choose optimal sets of applications belonging to different categories (eg. browsers, e-mails, cameras) while minimizing energy consumption, transmitted data, and maximizing application rating. We also help developers by showing the relative placement of their application’s efficiency with respect to selected others. When the optimal set of applications is computed, it is leveraged to position a given application with respect to the optimal, median and worst application in its category (eg. browsers). Out of eight categories we selected 144 applications, manually defined typical execution scenarios, collected the relevant data, and computed the Pareto optimal front solving a multi-objective optimization problem. We report evidence that, on the one hand, ratings do not correlate with energy efficiency and data frugality. On the other hand, we show that it is possible to help developers understanding how far is a new Android application power consumption and network usage with respect to optimal applications in the same category. From the user perspective, we show that choosing optimal sets of applications, power consumption and network usage can be reduced by 16.61% and 40.17%, respectively, in comparison to choosing the set of applications that maximizes only the rating.
  8. [8]W. Li, J. Hayes, G. Antoniol, Y. G. Guéhéneuc, and B. Adams, “Error leakage and wasted time: sensitivity and effort analysis of a requirements consistency checking process,” Journal of Software: Evolution and Process, vol. 28, no. 12, pp. 1061–1080, 2016, . doi
    Bibtex
      @article{Wenbin2016,
      author = {Li, Wenbin and Hayes, Jane and Antoniol, Giulio and Gu{\'{e}}h{\'{e}}neuc, Yann Ga{\"{e}}l and Adams, Bram},
      title = {Error leakage and wasted time: sensitivity and effort analysis of a requirements consistency checking process},
      journal = {Journal of Software: Evolution and Process},
      volume = {28},
      number = {12},
      issn = {2047-7481},
      ee = {http://dx.doi.org/10.1002/smr.1819},
      doi = {10.1002/smr.1819},
      pages = {1061--1080},
      year = {2016}
    }
    
      
    Abstract Several techniques are used by requirements engineering practitioners to address difficult problems such as specifying precise requirements while using inherently ambiguous natural language text and ensuring the consistency of requirements. Often, these problems are addressed by building processes/tools that combine multiple techniques where the output from 1 technique becomes the input to the next. While powerful, these techniques are not without problems. Inherent errors in each technique may leak into the subsequent step of the process. We model and study 1 such process, for checking the consistency of temporal requirements, and assess error leakage and wasted time. We perform an analysis of the input factors of our model to determine the effect that sources of uncertainty may have on the final accuracy of the consistency checking process. Convinced that error leakage exists and negatively impacts the results of the overall consistency checking process, we perform a second simulation to assess its impact on the analysts’ efforts to check requirements consistency. We show that analyst’s effort varies depending on the precision and recall of the subprocesses and that the number and capability of analysts affect their effort. We share insights gained and discuss applicability to other processes built of piped techniques.

2015

  1. [1]D. Martin, J. Cordy, B. Adams, and G. Antoniol, “Make It Simple - An Empirical Analysis of GNU Make Feature Use in Open Source Projects,” in ICPC, 2015, pp. 207–217.
    Bibtex
      @inproceedings{doug2015,
      author = {Martin, Douglas and Cordy, James and Adams, Bram and Antoniol, Giulio},
      title = {Make It Simple - An Empirical Analysis of GNU Make Feature Use in Open Source Projects},
      booktitle = {ICPC},
      year = {2015},
      pages = {207-217}
    }
    
      
    Abstract Make is one of the oldest build technologies and is still widely used today, whether by manually writing Makefiles, or by generating them using tools like Autotools and CMake. Despite its conceptual simplicity, modern Make implementations such as GNU Make have become very complex languages, featuring functions, macros, lazy variable assignments and (in GNU Make 4.0) the Guile embedded scripting language. Since we are interested in understanding how widespread such complex language features are, this paper studies the use of Make features in almost 20,000 Makefiles, comprised of over 8.4 million lines, from more than 350 different open source projects. We look at the popularity of features and the difference between hand-written Makefiles and those generated using various tools. We find that generated Makefiles use only a core set of features and that more advanced features (such as function calls) are used very little, and almost exclusively in hand-written Makefiles.
  2. [2]N. Ali, Z. Sharafi, Y. G. Guéhéneuc, and G. Antoniol, “An empirical study on the importance of source code entities for requirements traceability,” Empirical Software Engineering, vol. 20, no. 2, pp. 442–478, 2015.
    Bibtex
      @article{AliSGA15,
      author = {Ali, Nasir and Sharafi, Zohreh and Gu{\'{e}}h{\'{e}}neuc, Yann Ga{\"{e}}l and Antoniol, Giuliano},
      title = {An empirical study on the importance of source code entities for requirements
                     traceability},
      journal = {Empirical Software Engineering},
      volume = {20},
      number = {2},
      pages = {442--478},
      year = {2015}
    }
    
      
    Abstract Requirements Traceability (RT) links help developers during program compre- hension and maintenance tasks. However, creating RT links is a laborious and resource- consuming task. Information Retrieval (IR) techniques are useful to automatically create traceability links. However, IR-based techniques typically have low accuracy (precision, recall, or both) and thus, creating RT links remains a human intensive process. We conjec- ture that understanding how developers verify RT links could help improve the accuracy of IR-based RT techniques to create RT links. Consequently, we perform an empirical study consisting of four case studies. First, we use an eye-tracking system to capture develop- ers’ eye movements while they verify RT links. We analyse the obtained data to identify and rank developers’ preferred types of Source Code Entities (SCEs), e.g., domain vs. implementation-level source code terms and class names vs. method names. Second, we perform another eye-tracking case study to confirm that it is the semantic content of the developers’ preferred types of SCEs and not their locations that attract developers’ atten- tion and help them in their task to verify RT links. Third, we propose an improved term weighting scheme, i.e., Developers Preferred Term Frequency/Inverse Document Frequency (DPTF/IDF), that uses the knowledge of the developers’ preferred types of SCEs to give more importance to these SCEs into the term weighting scheme. We integrate this weighting scheme with an IR technique, i.e., Latent Semantic Indexing (LSI), to create a new technique to RT link recovery. Using three systems (iTrust, Lucene, and Pooka), we show that the proposed technique statistically improves the accuracy of the recovered RT links over a technique based on LSI and the usual Term Frequency/Inverse Docu- ment Frequency (TF/IDF) weighting scheme. Finally, we compare the newly proposed DPTF/IDF with our original Domain Or Implementation/Inverse Document Frequency (DOI/IDF) weighting scheme.
  3. [3]V. Arnaoudova, M. D. Penta, and G. Antoniol, “Linguistic Antipatterns: What They are and How Developers Perceive Them,” Empirical Software Engineering (EMSE), pp. 104–158, Jan. 2015, [Online]. Available at: /wp-content/uploads/2014/10/2014-EMSE-Arnaodova-et-al-Perception-LAs.pdf.
    Bibtex
      @article{LAsPerception-15,
      title = {Linguistic Antipatterns: What They are and How Developers Perceive Them},
      author = {Arnaoudova, Venera and Penta, Massimiliano Di and Antoniol, Giuliano},
      url = {/wp-content/uploads/2014/10/2014-EMSE-Arnaodova-et-al-Perception-LAs.pdf},
      year = {2015},
      date = {2015-01-29},
      journal = {Empirical Software Engineering (EMSE)},
      pages = {104-158},
      keywords = {developers' perception, empirical study, linguistic antipatterns, source code identifiers}
    }
    
      
    Abstract Antipatterns are known as poor solutions to recurring problems. For example, Brown et al. and Fowler define practices concerning poor design or implementation solutions. However, we know that the source code lexicon is part of the factors that affect the psychological complexity of a program, i.e., factors that make a program difficult to understand and maintain by humans. The aim of this work is to identify recurring poor practices related to inconsistencies among the naming, documentation, and implementation of an entity—called Linguistic Antipatterns (LAs)—that may impair program understanding. To this end, we first mine examples of such inconsistencies in real open-source projects and abstract them into a catalog of 17 recurring LAs related to methods and attributes1. Then, to understand the relevancy of LAs, we perform two empirical studies with developers—30 external (i.e., not familiar with the code) and 14 internal (i.e., people developing or maintaining the code). Results indicate that the majority of the participants perceive LAs as poor practices and therefore must be avoided—69% and 51% of the external and internal developers, respectively. As further evidence of LAs’ validity, open source developers that were made aware of LAs reacted to the issue by making code changes in 10% of the cases. Finally, in order to facilitate the use of LAs in practice, we identified a sub-set of LAs which were universally agreed upon as being problematic; those which had a clear dissonance between code behavior and lexicon.
  4. [4]S. Panichella, V. Arnaoudova, M. D. Penta, and G. Antoniol, “Would Static Analysis Tools Help Developers with Code Reviews?,” in International Conference on Software Analysis, Evolution, and Reengineering (SANER), Jan. 2015, pp. 161–170.
    Bibtex
      @inproceedings{Panichella:saner15:CodeReviewsWarnings,
      title = {Would Static Analysis Tools Help Developers with Code Reviews?},
      author = {Panichella, Sebastiano and Arnaoudova, Venera and Penta, Massimiliano Di and Antoniol, Giuliano},
      year = {2015},
      date = {2015-01-01},
      booktitle = {International Conference on Software Analysis, Evolution, and Reengineering (SANER)},
      pages = {161-170}
    }
    
      
    Abstract Code reviews have been conducted since decades in software projects, with the aim of improving code quality from many different points of view. During code reviews, developers are supported by checklists, coding standards and, possibly, by various kinds of static analysis tools. This paper investigates whether warnings highlighted by static analysis tools are taken care of during code reviews and, whether there are kinds of warnings that tend to be removed more than others. Results of a study conducted by mining the Gerrit repository of six Java open source projects indicate that the density of warnings only slightly vary after each review. The overall percentage of warnings removed during reviews is slightly higher than what previous studies found for the overall project evolution history. However, when looking (quantitatively and qualitatively) at specific categories of warnings, we found that during code reviews developers focus on certain kinds of problems. For such categories of warnings the removal percentage tend to be very high, often above 50% and sometimes up to 100%. Examples of those are warnings in the imports, regular expressions, and type resolution categories. In conclusion, while a broad warning detection might produce way too many false positives, enforcing the removal of certain warnings prior to the patch submission could reduce the amount of effort provided during the code review process.
  5. [5]L. M. Eshkevari, F. D. Santos, J. R. Cordy, and G. Antoniol, “Are PHP applications ready for Hack,” in International Conference on Software Analysis, Evolution, and Reengineering (SANER), Jan. 2015, pp. 63–72.
    Bibtex
      @inproceedings{laleh2015,
      title = {Are PHP applications ready for Hack},
      author = {Eshkevari, Laleh Mousavi and Santos, Fabien Dos and Cordy, James R. and Antoniol, Giuliano},
      year = {2015},
      date = {2015-01-01},
      booktitle = {International Conference on Software Analysis, Evolution, and Reengineering (SANER)},
      pages = {63-72}
    }
    
      
    Abstract PHP is by far the most popular WEB scripting language, accounting for more than 80% of existing websites. PHP is dynamically typed, which means that variables take on the type of the objects that they are assigned, and may change type as execution proceeds. While some type changes are likely not harmful, others involving function calls and global variables may be more difficult to understand and the source of many bugs. Hack, a new PHP variant endorsed by Facebook, attempts to address this problem by adding static typing to PHP variables, which limits them to a single consistent type throughout execution. This paper defines an empirical taxonomy of PHP type changes along three dimensions: the complexity or burden imposed to understand the type change; whether or not the change is potentially harmful; and the actual types changed. We apply static and dynamic analyses to three widely used WEB applications coded in PHP (WordPress, Drupal and phpBB) to investigate (1) to what extent developers really use dynamic typing, (2) what kinds of type changes are actually encountered; and (3) how difficult it might be to refactor the code to avoid type changes, and thus meet the constraints of Hack’s static typing. We report evidence that dynamic typing is actually a relatively uncommon practice in production PHP programs, and that most dynamic type changes are simple representational changes, such as between strings and integers. We observe that most PHP type changes in these programs are relatively simple, and that the largest proportion of them are easy to refactor to consistent static typing using simple local renaming transformations. Overall, the paper casts doubt on the usefulness of dynamic typing in PHP, and indicates that for many production applications, conversion to Hack’s static typing may not be very difficult.

2014

  1. [1]V. Arnaoudova, L. M. Eshkevari, M. D. Penta, R. Oliveto, G. Antoniol, and Y.-G. Guéhéneuc, “REPENT: Analyzing the Nature of Identifier Renamings,” IEEE Trans. Software Eng., vol. 40, no. 5, pp. 502–532, 2014.
    Bibtex
      @article{journals/tse/ArnaoudovaEPOAG14,
      author = {Arnaoudova, Venera and Eshkevari, Laleh Mousavi and Penta, Massimiliano Di and Oliveto, Rocco and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {REPENT: Analyzing the Nature of Identifier Renamings},
      journal = {IEEE Trans. Software Eng.},
      volume = {40},
      number = {5},
      year = {2014},
      pages = {502-532},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2014.2312942},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  2. [2]S. Medini, V. Arnaoudova, M. D. Penta, G. Antoniol, Y.-G. Guéhéneuc, and P. Tonella, “SCAN: An Approach to Label and Relate Execution Trace Segments,” Journal of Software: Evolution and Process (JSEP), vol. 26, no. 11, pp. 962–995, Jan. 2014.
    Bibtex
      @article{SCAN-14,
      title = {SCAN: An Approach to Label and Relate Execution Trace Segments},
      author = {Medini, Soumaya and Arnaoudova, Venera and Penta, Massimiliano Di and Antoniol, Giuliano and Guéhéneuc, Yann-Gaël and Tonella, Paolo},
      year = {2014},
      date = {2014-01-01},
      journal = {Journal of Software: Evolution and Process (JSEP)},
      volume = {26},
      number = {11},
      pages = {962--995}
    }
    
      
    Abstract Program comprehension is a prerequisite to any maintenance and evolution task. In particular, when performing feature location, developers perform program comprehension by abstracting software features and identifying the links between high-level abstractions (features) and program elements. We present Segment Concept AssigNer (SCAN), an approach to support developers in feature location. SCAN uses a search-based approach to split execution traces into cohesive segments. Then, it labels the segments with relevant keywords and, finally, uses formal concept analysis to identify relations among segments. In a first study, we evaluate the performances of SCAN on six Java programs by 31 participants. We report an average precision of 69% and a recall of 63% when comparing the manual and automatic labels and a precision of 63% regarding the relations among segments identified by SCAN. After that, we evaluate the usefulness of SCAN for the purpose of feature location on two Java programs. We provide evidence that SCAN (i) identifies 69% of the gold set methods and (ii) is effective in reducing the quantity of information that developers must process to locate features—reducing the number of methods to understand by an average of 43% compared to the entire execution traces.
  3. [3]Smet Benoı̂t De, L. Lempereur, Z. Sharafi, Y.-G. Guéhéneuc, G. Antoniol, and N. Habra, “Taupe: Visualizing and analyzing eye-tracking data,” Sci. Comput. Program., vol. 79, pp. 260–278, 2014.
    Bibtex
      @article{1s20S0167642312000135main,
      author = {Smet, Beno\^{\i}t De and Lempereur, Lorent and Sharafi, Zohreh and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano and Habra, Naji},
      title = {Taupe: Visualizing and analyzing eye-tracking data},
      journal = {Sci. Comput. Program.},
      volume = {79},
      year = {2014},
      pages = {260-278},
      ee = {http://dx.doi.org/10.1016/j.scico.2012.01.004},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2014/1s20S0167642312000135main.pdf}
    }
    
      
    Abstract Program comprehension is an essential part of any maintenance activity. It allows developers to build mental models of the program before undertaking any change. It has been studied by the research community for many years with the aim to devise models and tools to understand and ease this activity. Recently, researchers have introduced the use of eye-tracking devices to gather and analyze data about the developers’ cognitive processes during program comprehension. However, eye-tracking devices are not completely reliable and, thus, recorded data sometimes must be processed, filtered, or corrected. Moreover, the analysis software tools packaged with eye-tracking devices are not open-source and do not always provide extension points to seamlessly integrate new sophisticated analyses. Consequently, we develop the Taupe software system to help researchers visualize, analyze, and edit the data recorded by eye-tracking devices. The two main objectives of Taupe are compatibility and extensibility so that researchers can easily: (1) apply the system on any eye-tracking data and (2) extend the system with their own analyses. To meet our objectives, we base the development of Taupe: (1) on well-known good practices, such as design patterns and a plug-in architecture using reflection, (2) on a thorough documentation, validation, and verification process, and (3) on lessons learned from existing analysis software systems. This paper describes the context of development of Taupe, the architectural and design choices made during its development, and its documentation, validation and verification process. It also illustrates the application of Taupe in three experiments on the use of design patterns by developers during program comprehension.
  4. [4]S. Panichella, G. Bavota, M. D. Penta, G. Canfora, and G. Antoniol, “How Developers’ Collaborations Identified from Different Sources Tell Us about Code Changes,” in 30th IEEE International Conference on Software Maintenance and Evolution, Victoria, BC, Canada, September 29 - October 3, 2014, 2014, pp. 251–260, . doi
    Bibtex
      @inproceedings{conf/icsm/PanichellaBPCA14,
      author = {Panichella, Sebastiano and Bavota, Gabriele and Penta, Massimiliano Di and Canfora, Gerardo and Antoniol, Giuliano},
      title = {How Developers' Collaborations Identified from Different Sources Tell Us about Code Changes},
      booktitle = {30th IEEE International Conference on Software Maintenance and Evolution, Victoria, BC, Canada, September 29 - October 3, 2014},
      pages = {251--260},
      year = {2014},
      url = {http://dx.doi.org/10.1109/ICSME.2014.47},
      doi = {10.1109/ICSME.2014.47}
    }
    
      
    Abstract Written communications recorded through chan- nels such as mailing lists or issue trackers, but also code co- changes, have been used to identify emerging collaborations in software projects. Also, such data has been used to identify the relation between developers’ roles in communication networks and source code changes, or to identify mentors aiding newcomers to evolve the software project. However, results of such analyses may be different depending on the communication channel being mined. This paper investigates how collaboration links vary and complement each other when they are identified through data from three different kinds of communication channels, i.e., mailing lists, issue trackers, and IRC chat logs. Also, the study investigates how such links overlap with links mined from code changes, and how the use of different sources would influence (i) the identification of project mentors, and (ii) the presence of a correlation between the social role of a developer and her changes. Results of a study conducted on seven open source projects indicate that the overlap of communication links between the various sources is relatively low, and that the application of networks obtained from different sources may lead to different results.
  5. [5]L. Guerrouj, M. D. Penta, Y. G. Guéhéneuc, and G. Antoniol, “An experimental investigation on the effects of context on source code identifiers splitting and expansion,” Empirical Software Engineering, vol. 19, no. 6, pp. 1706–1753, 2014, . doi
    Bibtex
      @article{journals/ese/GuerroujPGA14,
      author = {Guerrouj, Latifa and Penta, Massimiliano Di and Gu{\'{e}}h{\'{e}}neuc, Yann Ga{\"{e}}l and Antoniol, Giuliano},
      title = {An experimental investigation on the effects of context on source code identifiers splitting and expansion},
      journal = {Empirical Software Engineering},
      volume = {19},
      number = {6},
      pages = {1706--1753},
      year = {2014},
      url = {http://dx.doi.org/10.1007/s10664-013-9260-1},
      doi = {10.1007/s10664-013-9260-1}
    }
    
      
    Abstract Recent and past studies indicate that source code lexicon plays an important role in program comprehension. Developers often compose source code identifiers with abbreviated words and acronyms, and do not always use consistent mechanisms and explicit separators when creating identifiers. Such choices and inconsistencies impede the work of developers that must understand identifiers by decomposing them into their component terms, and mapping them onto dictionary, application or domain words. When software documentation is scarce, outdated or simply not available, developers must therefore use the available contextual information to understand the source code. This paper aims at investigating how developers split and expand source code identifiers, and, specifically, the extent to which different kinds of contextual information could support such a task. In particular, we consider (i) an internal context consisting of the content of functions and source code files in which the identifiers are located, and (ii) an external context involving external documentation. We conducted a family of two experiments with 63 participants, including bachelor, master, Ph.D. students, and post-docs. We randomly sampled a set of 50 identifiers from a corpus of open source C programs and we asked participants to split and expand them with the availability (or not) of internal and external contexts. We report evidence on the usefulness of contextual information for identifier splitting and acronym/abbreviation expansion. We observe that the source code files are more helpful than just looking at function source code, and that the application-level contextual information does not help any further. The availability of external sources of information only helps in some circumstances. Also, in some cases, we observe that participants better expanded acronyms than abbreviations, although in most cases both exhibit the same level of accuracy. Finally, results indicated that the knowledge of English plays a significant effect in identifier splitting/expansion. The obtained results confirm the conjecture that contextual information is useful in program comprehension, including when developers split and expand identifiers to understand them. We hypothesize that the integration of identifier splitting and expansion tools with IDE could help to improve developers’ productivity.
  6. [6]L. M. Eshkevari, G. Antoniol, J. R. Cordy, and M. D. Penta, “Identifying and locating interference issues in PHP applications: the case of WordPress,” in ICPC, 2014, pp. 157–167.
    Bibtex
      @inproceedings{conf/iwpc/EshkevariACP14,
      author = {Eshkevari, Laleh Mousavi and Antoniol, Giuliano and Cordy, James R. and Penta, Massimiliano Di},
      title = {Identifying and locating interference issues in PHP applications: the case of WordPress},
      booktitle = {ICPC},
      year = {2014},
      pages = {157-167},
      ee = {http://doi.acm.org/10.1145/2597008.2597153},
      crossref = {DBLP:conf/iwpc/2014},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract he large success of Content management Systems (CMS) such as WordPress is largely due to the rich ecosystem of themes and plugins developed around the CMS that allows users to easily build and customize complex Web applications featuring photo galleries, contact forms, and blog pages. However, the design of the CMS, the plugin-based architecture, and the implicit characteristics of the programming language used to develop them (often PHP), can cause interference or unwanted side effects between the resources declared and used by different plugins. This paper describes the problem of interference between plugins in CMS, specifically those developed using PHP, and outlines an approach combining static and dynamic analysis to detect and locate such interference. Results of a case study conducted over 10 WordPress plugins shows that the analysis can help to identify and locate plugin interference, and thus be used to enhance CMS quality assurance
  7. [7]G. Bavota, R. Oliveto, A. D. Lucia, A. Marcus, Y.-G. Guéhéneuc, and G. Antoniol, “In medio stat virtus: Extract class refactoring through nash equilibria,” in CSMR-WCRE, 2014, pp. 214–223.
    Bibtex
      @inproceedings{conf/csmr/BavotaOLMGA14,
      author = {Bavota, Gabriele and Oliveto, Rocco and Lucia, Andrea De and Marcus, Andrian and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {In medio stat virtus: Extract class refactoring through nash equilibria},
      booktitle = {CSMR-WCRE},
      year = {2014},
      pages = {214-223},
      ee = {http://dx.doi.org/10.1109/CSMR-WCRE.2014.6747173},
      crossref = {DBLP:conf/csmr/2014},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  8. [8]S. Kpodjedo, P. Galinier, and G. Antoniol, “Using local similarity measures to efficiently address approximate graph matching,” Discrete Applied Mathematics, vol. 164, pp. 161–177, 2014.
    Bibtex
      @article{journals/dam/KpodjedoGA14,
      author = {Kpodjedo, Segla and Galinier, Philippe and Antoniol, Giuliano},
      title = {Using local similarity measures to efficiently address approximate graph matching},
      journal = {Discrete Applied Mathematics},
      volume = {164},
      year = {2014},
      pages = {161-177},
      ee = {http://dx.doi.org/10.1016/j.dam.2012.01.019},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  9. [9]F. Jaafar, Y.-G. Guéhéneuc, S. Hamel, and G. Antoniol, “Detecting asynchrony and dephase change patterns by mining software repositories,” Journal of Software: Evolution and Process, vol. 26, no. 1, pp. 77–106, 2014.
    Bibtex
      @article{journals/smr/JaafarGHA14,
      author = {Jaafar, Fehmi and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Hamel, Sylvie and Antoniol, Giuliano},
      title = {Detecting asynchrony and dephase change patterns by mining software repositories},
      journal = {Journal of Software: Evolution and Process},
      volume = {26},
      number = {1},
      year = {2014},
      pages = {77-106},
      ee = {http://dx.doi.org/10.1002/smr.1635},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  10. [10]W. Wu, A. Serveaux, Y. G. Guéhéneuc, and G. Antoniol, “The Impact of Imperfect Change Rules on Framework API Evolution Identification: An Empirical Study,” Empirical Software Engineering, vol. 20, pp. 1126–1158, 2014.
    Bibtex
      @article{Wei2014emse,
      author = {Wu, Wei and Serveaux, Adrien and Gu{\'{e}}h{\'{e}}neuc, Yann Ga{\"{e}}l and Antoniol, Giuliano},
      title = {The Impact of Imperfect Change Rules on Framework API Evolution Identification: An Empirical Study},
      journal = {Empirical Software Engineering},
      volume = {20},
      number = {},
      pages = {1126--1158},
      year = {2014}
    }
    
      
    Abstract Softwareframeworkskeepevolving.Itisoftentime-consumingfordevelopersto keep their client code up-to-date. Not all frameworks have documentation about the upgrad- ing process. Many approaches have been proposed to ease the impact of non-documented framework evolution on developers by identifying change rules between two releases of a framework, but these change rules are imperfect, i.e., not 100 % correct. To the best of our knowledge, there is no empirical study to show the usefulness of these imperfect change rules. Therefore, we design and conduct an experiment to evaluate their impact. In the experiment, the subjects must find the replacements of 21 missing methods in the new releases of three open-source frameworks with the help of (1) all-correct, (2) imperfect, and (3) no change rules. The statistical analysis results show that the precision of the replace- ments found by the subjects with the three sets of change rules are significantly different. The precision with all-correct change rules is the highest while that with no change rules is the lowest, while imperfect change rules give a precision in between. The effect size of the difference between the subjects with no and imperfect change rules is large and that between the subjects with imperfect and correct change rules is moderate. The results of this study show that the change rules generated by framework API evolution approaches do help developers, even they are not always correct. The imperfect change rules can be used by developers upgrading their code when documentation is not available or as a complement to partial documentation. The moderate difference between results from subjects with imper- fect and all-correct change rules also suggests that improving precision of change rules will still help developers

Older papers

  1. [1]N. Ali, Y.-G. Guéhéneuc, and G. Antoniol, “Trustrace: Mining Software Repositories to Improve the Accuracy of Requirement Traceability Links,” IEEE Trans. Software Eng., vol. 39, no. 5, pp. 725–741, 2013.
    Bibtex
      @article{06341764,
      author = {Ali, Nasir and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Trustrace: Mining Software Repositories to Improve the Accuracy of Requirement Traceability Links},
      journal = {IEEE Trans. Software Eng.},
      volume = {39},
      number = {5},
      year = {2013},
      pages = {725-741},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2012.71},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2013/06341764.pdf}
    }
    
      
    Abstract Traceability is the only means to ensure that the source code of a system is consistent with its requirements and that all and only the specified requirements have been implemented by developers. During software maintenance and evolution, requirement traceability links become obsolete because developers do not/cannot devote effort to update them. Yet, recovering these traceability links later is a daunting and costly task for developers. Consequently, the literature proposed methods, techniques, and tools to recover these traceability links semi-automatically or automatically. Among the proposed techniques, the literature showed that information retrieval (IR) techniques can automatically recover traceability links between free-text requirements and source code. However, IR techniques lack accuracy (precision and recall). In this paper, we show that mining software repositories and combining mined results with IR techniques can improve the accuracy (precision and recall) of IR techniques and we propose Trustrace, a trust-based traceability recovery approach. We apply Trustrace on four medium-size open-source systems to compare the accuracy of its traceability links with those recovered using state-of-the-art IR techniques from the literature, based on the Vector Space Model and Jensen–Shannon model. The results of Trustrace are up to 22.7% more precise and have 7.66% better recall values than those of the other techniques, on average. We thus show that mining software repositories and combining the mined data with existing results from IR techniques improves the precision and recall of requirement traceability links.
  2. [2]M. Pinzger and G. Antoniol, “Guest editorial: reverse engineering,” Empirical Software Engineering, vol. 18, no. 5, pp. 857–858, 2013.
    Bibtex
      @article{journals/ese/PinzgerA13,
      author = {Pinzger, Martin and Antoniol, Giuliano},
      title = {Guest editorial: reverse engineering},
      journal = {Empirical Software Engineering},
      volume = {18},
      number = {5},
      year = {2013},
      pages = {857-858},
      ee = {http://dx.doi.org/10.1007/s10664-012-9237-5},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  3. [3]A. Sabane, M. D. Penta, G. Antoniol, and Y.-G. Guéhéneuc, “A Study on the Relation between Antipatterns and the Cost of Class Unit Testing,” in CSMR, 2013, pp. 167–176.
    Bibtex
      @inproceedings{06498465,
      author = {Sabane, Aminata and Penta, Massimiliano Di and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {A Study on the Relation between Antipatterns and the Cost of Class Unit Testing},
      booktitle = {CSMR},
      year = {2013},
      pages = {167-176},
      ee = {http://dx.doi.org/10.1109/CSMR.2013.26, http://doi.ieeecomputersociety.org/10.1109/CSMR.2013.26},
      crossref = {DBLP:conf/csmr/2013},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2013/06498465.pdf}
    }
    
      
    Abstract Antipatterns are known as recurring, poor design choices, recent and past studies indicated that they negatively affect software systems in terms of understand ability and maintainability, also increasing change-and defect-proneness. For this reason, refactoring actions are often suggested. In this paper, we investigate a different side-effect of antipatterns, which is their effect on testability and on testing cost in particular. We consider as (upper bound) indicator of testing cost the number of test cases that satisfy the minimal data member usage matrix (MaDUM) criterion proposed by Bashir and Goel. A study-carried out on four Java programs, Ant 1.8.3, ArgoUML 0.20, Check Style 4.0, and JFreeChart 1.0.13-supports the evidence that, on the one hand, antipatterns unit testing requires, on average, a number of test cases substantially higher than unit testing for non-antipattern classes. On the other hand, antipattern classes must be carefully tested because they are more defect-prone than other classes. Finally, we illustrate how specific refactoring actions-applied to classes participating in antipatterns-could reduce testing cost.
  4. [4]G. Antoniol and K. B. Gallagher, “Preface to the special issue on program comprehension,” Empirical Software Engineering, vol. 18, no. 2, pp. 177–180, 2013.
    Bibtex
      @article{journals/ese/AntoniolG13,
      author = {Antoniol, Giuliano and Gallagher, Keith B.},
      title = {Preface to the special issue on program comprehension},
      journal = {Empirical Software Engineering},
      volume = {18},
      number = {2},
      year = {2013},
      pages = {177-180},
      ee = {http://dx.doi.org/10.1007/s10664-012-9212-1},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  5. [5]Z. Soh, F. Khomh, Y.-G. Guéhéneuc, G. Antoniol, and B. Adams, “On the effect of program exploration on maintenance tasks,” in WCRE, 2013, pp. 391–400.
    Bibtex
      @inproceedings{conf/wcre/SohKGAA13,
      author = {Soh, Z{\'e}phyrin and Khomh, Foutse and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano and Adams, Bram},
      title = {On the effect of program exploration on maintenance tasks},
      booktitle = {WCRE},
      year = {2013},
      pages = {391-400},
      ee = {http://doi.ieeecomputersociety.org/10.1109/WCRE.2013.6671314},
      crossref = {DBLP:conf/wcre/2013},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  6. [6]Z. Sharafi, A. Marchetto, A. Susi, G. Antoniol, and Y.-G. Guéhéneuc, “An empirical study on the efficiency of graphical vs. textual representations in requirements comprehension,” in ICPC, 2013, pp. 33–42.
    Bibtex
      @inproceedings{conf/iwpc/SharafiMSAG13,
      author = {Sharafi, Zohreh and Marchetto, Alessandro and Susi, Angelo and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {An empirical study on the efficiency of graphical vs. textual representations in requirements comprehension},
      booktitle = {ICPC},
      year = {2013},
      pages = {33-42},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ICPC.2013.6613831},
      crossref = {DBLP:conf/iwpc/2013},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  7. [7]L. Guerrouj, M. D. Penta, G. Antoniol, and Y.-G. Guéhéneuc, “TIDIER: an identifier splitting approach using speech recognition techniques,” Journal of Software: Evolution and Process, vol. 25, no. 6, pp. 575–599, 2013.
    Bibtex
      @article{journals/smr/GuerroujPAG13,
      author = {Guerrouj, Latifa and Penta, Massimiliano Di and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {TIDIER: an identifier splitting approach using speech recognition techniques},
      journal = {Journal of Software: Evolution and Process},
      volume = {25},
      number = {6},
      year = {2013},
      pages = {575-599},
      ee = {http://dx.doi.org/10.1002/smr.539},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract The software engineering literature reports empirical evidence on the relation between various characteristics of a software system and its quality. Amon g other factors, recent studies have shown that a proper choice of identifiers influences understandability and maintainability. Indeed, identifiers are developers’ main source of information and guide their cognitive processes during program comprehension when high-level documentation is scarce or outdat ed and when source code is not sufficiently commented. This paper proposes a novel approach to recognize words composing source code identifiers. The approach is based on an adaptation of Dynamic Time Warping used to recognize words in continuous speech. The approach overcomes the limitations of existing identifier-splitting approaches when naming conventions (e.g. Camel Case) are not used or when identifiers contain abbreviations. We apply the approach on a sample of more than 1,000 identifiers extracted from 340 C programs and compare its results with a simple Camel Case splitter and with an implementation of an alternative identifier splitting approach, Samurai. Results indicate the capability of the novel approach: (i) to outper form the alternative ones, when using a dictionary augmented with domain knowledge or a contextual dictionary and (ii) to expand 48 % of a set of selecte d abbreviations into dictionary words.
  8. [8]V. Arnaoudova, M. D. Penta, G. Antoniol, and Y.-G. Guéhéneuc, “A New Family of Software Anti-patterns: Linguistic Anti-patterns,” in CSMR, 2013, pp. 187–196.
    Bibtex
      @inproceedings{06498467,
      author = {Arnaoudova, Venera and Penta, Massimiliano Di and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {A New Family of Software Anti-patterns: Linguistic Anti-patterns},
      booktitle = {CSMR},
      year = {2013},
      pages = {187-196},
      ee = {http://dx.doi.org/10.1109/CSMR.2013.28, http://doi.ieeecomputersociety.org/10.1109/CSMR.2013.28},
      crossref = {DBLP:conf/csmr/2013},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2013/06498467.pdf}
    }
    
      
    Abstract Recent and past studies have shown that poor source code lexicon negatively affects software understand ability, maintainability, and, overall, quality. Besides a poor usage of lexicon and documentation, sometimes a software artifact description is misleading with respect to its implementation. Consequently, developers will spend more time and effort when understanding these software artifacts, or even make wrong assumptions when they use them. This paper introduces the definition of software linguistic antipatterns, and defines a family of them, i.e., those related to inconsistencies (i) between method signatures, documentation, and behavior and (ii) between attribute names, types, and comments. Whereas "design" antipatterns represent recurring, poor design choices, linguistic antipatterns represent recurring, poor naming and commenting choices. The paper provides a first catalogue of one family of linguistic antipatterns, showing real examples of such antipatterns and explaining what kind of misunderstanding they can cause. Also, the paper proposes a detector prototype for Java programs called LAPD (Linguistic Anti-Pattern Detector), and reports a study investigating the presence of linguistic antipatterns in four Java software projects.
  9. [9]M. Leotta, F. Ricca, G. Antoniol, V. Garousi, J. Zhi, and G. Ruhe, “A Pilot Experiment to Quantify the Effect of Documentation Accuracy on Maintenance Tasks,” in ICSM, 2013, pp. 428–431.
    Bibtex
      @inproceedings{conf/icsm/LeottaRAGZR13,
      author = {Leotta, Maurizio and Ricca, Filippo and Antoniol, Giuliano and Garousi, Vahid and Zhi, Junji and Ruhe, G{\"u}nther},
      title = {A Pilot Experiment to Quantify the Effect of Documentation Accuracy on Maintenance Tasks},
      booktitle = {ICSM},
      year = {2013},
      pages = {428-431},
      ee = {http://dx.doi.org/10.1109/ICSM.2013.64},
      crossref = {DBLP:conf/icsm/2013},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract This paper reports the results and some challenges we discovered during the design and execution of a pilot experiment with 21 bachelor students aimed at investigating the effect of documentation accuracy during software maintenance and evolution activities. As documentation we considered: a high level system functionality description and UML documents. Preliminary results indicate a benefit of +15% in terms of efficiency (computed as number of correct tasks per minute) when a more accurate documentation is used. The discovered challenging aspects to carefully consider in future executions of the experiment are as follows: selecting "the right" documentation artefacts, maintenance tasks and documentation versions, verifying that the subjects really used the documentation during the experiment and measuring documentation-code alignment.
  10. [10]Z. Soh, F. Khomh, Y.-G. Guéhéneuc, and G. Antoniol, “Towards understanding how developers spend their effort during maintenance activities,” in WCRE, 2013, pp. 152–161.
    Bibtex
      @inproceedings{conf/wcre/SohKGA13,
      author = {Soh, Z{\'e}phyrin and Khomh, Foutse and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Towards understanding how developers spend their effort during maintenance activities},
      booktitle = {WCRE},
      year = {2013},
      pages = {152-161},
      ee = {http://doi.ieeecomputersociety.org/10.1109/WCRE.2013.6671290},
      crossref = {DBLP:conf/wcre/2013},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  11. [11]S. Kpodjedo, F. Ricca, P. Galinier, G. Antoniol, and Y.-G. Guéhéneuc, “MADMatch: Many-to-Many Approximate Diagram Matching for Design Comparison,” IEEE Trans. Software Eng., vol. 39, no. 8, pp. 1090–1111, 2013.
    Bibtex
      @article{06464271,
      author = {Kpodjedo, Segla and Ricca, Filippo and Galinier, Philippe and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {MADMatch: Many-to-Many Approximate Diagram Matching for Design Comparison},
      journal = {IEEE Trans. Software Eng.},
      volume = {39},
      number = {8},
      year = {2013},
      pages = {1090-1111},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2013.9},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2013/06464271.pdf}
    }
    
      
    Abstract Matching algorithms play a fundamental role in many important but difficult software engineering activities, especially design evolution analysis and model compari son. We present MADMatch, a fast and scalable Many-to-many Approximate Diagram Matching approach based on an Error-Tolerant Graph matching (ETGM) formulation. Diag rams are represented as graphs, costs are assigned to possible differences between two given graphs, and the goal is to retrieve the cheapest matching. We address the resulting optimisation problem with a tabu search enhanced by the novel use of lexical and structural information. Through several case studies with different types of diagrams and tasks, we show that our generic approach obtains better results than dedicated state-of-the-art algorithms, such as AURA, PLTSDiff or UMLDiff, on the exact same datasets used to introduce (and evaluate) these algorithms.
  12. [12]S. Kpodjedo, F. Ricca, P. Galinier, G. Antoniol, and Y.-G. Guéhéneuc, “Studying software evolution of large object-oriented software systems using an ETGM algorithm,” Journal of Software: Evolution and Process, vol. 25, no. 2, pp. 139–163, 2013.
    Bibtex
      @article{journals/smr/KpodjedoRGAG13,
      author = {Kpodjedo, Segla and Ricca, Filippo and Galinier, Philippe and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {Studying software evolution of large object-oriented software systems using an ETGM algorithm},
      journal = {Journal of Software: Evolution and Process},
      volume = {25},
      number = {2},
      year = {2013},
      pages = {139-163},
      ee = {http://dx.doi.org/10.1002/smr.519},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  13. [13]N. Bhattacharya et al., “Optimizing Threads Schedule Alignments to Expose the Interference Bug Pattern,” in SSBSE, 2012, pp. 90–104.
    Bibtex
      @inproceedings{conf/ssbse/BhattacharyaEDBADG12,
      author = {Bhattacharya, Neelesh and El-Mahi, Olfat and Duclos, Etienne and Beltrame, Giovanni and Antoniol, Giuliano and Digabel, S{\'e}bastien Le and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {Optimizing Threads Schedule Alignments to Expose the Interference Bug Pattern},
      booktitle = {SSBSE},
      year = {2012},
      pages = {90-104},
      ee = {http://dx.doi.org/10.1007/978-3-642-33119-0_8},
      crossref = {DBLP:conf/ssbse/2012},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  14. [14]N. Ali, Z. Sharafi, Y.-G. Guéhéneuc, and G. Antoniol, “An empirical study on requirements traceability using eye-tracking,” in ICSM, 2012, pp. 191–200.
    Bibtex
      @inproceedings{06405271,
      author = {Ali, Nasir and Sharafi, Zohreh and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {An empirical study on requirements traceability using eye-tracking},
      booktitle = {ICSM},
      year = {2012},
      pages = {191-200},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ICSM.2012.6405271},
      crossref = {DBLP:conf/icsm/2012},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2012/06405271.pdf}
    }
    
      
    Abstract Requirements traceability (RT) links help developers to understand programs and ensure that their source code is consistent with its documentation. Creating RT links is a laborious and resource-consuming task. Information Retrieval (IR) techniques are useful to automatically recover traceability links. However, IR-based approaches typically have low accuracy (precision and recall) and, thus, creating RT links remains a human intensive process. We conjecture that understanding how developers verify RT links could help improve the accuracy of IR-based approaches to recover RT links. Consequently, we perform an empirical study consisting of two controlled experiments. First, we use an eye-tracking system to capture developers’ eye movements while they verify RT links. We analyse the obtained data to identify and rank developers’ preferred source code entities (SCEs), e.g., class names, method names. Second, we use the ranked SCEs to propose two new weighting schemes called SE/IDF (source code entity/inverse document frequency) and DOI/IDF (domain or implementation/inverse document frequency) to recover RT links combined with an IR technique. SE/IDF is based on the developers preferred SCEs to verify RT links. DOI/IDF is an extension of SE/IDF distinguishing domain and implementation concepts. We use LSI combined with SE/IDF, DOI/IDF, and TF/IDF to show, using two systems, iTrust and Pooka, that LSIDOI/IDF statistically improves the accuracy of the recovered RT links over LSITF/IDF.
  15. [15]N. Ali, A. Sabane, Y.-G. Guéhéneuc, and G. Antoniol, “Improving Bug Location Using Binary Class Relationships,” in SCAM, 2012, pp. 174–183.
    Bibtex
      @inproceedings{conf/scam/AliSGA12,
      author = {Ali, Nasir and Sabane, Aminata and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Improving Bug Location Using Binary Class Relationships},
      booktitle = {SCAM},
      year = {2012},
      pages = {174-183},
      ee = {http://doi.ieeecomputersociety.org/10.1109/SCAM.2012.26},
      crossref = {DBLP:conf/scam/2012},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  16. [16]L. Guerrouj, P. Galinier, Y.-G. Guéhéneuc, G. Antoniol, and M. D. Penta, “TRIS: A Fast and Accurate Identifiers Splitting and Expansion Algorithm,” in WCRE, 2012, pp. 103–112.
    Bibtex
      @inproceedings{conf/wcre/GuerroujGGAP12,
      author = {Guerrouj, Latifa and Galinier, Philippe and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano and Penta, Massimiliano Di},
      title = {TRIS: A Fast and Accurate Identifiers Splitting and Expansion Algorithm},
      booktitle = {WCRE},
      year = {2012},
      pages = {103-112},
      ee = {http://doi.ieeecomputersociety.org/10.1109/WCRE.2012.20},
      crossref = {DBLP:conf/wcre/2012},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Understanding source code identifiers, by identifying words composing them, is a necessary step for many program comprehension, reverse engineering, or redocumentation tasks. To this aim, researchers have proposed several identifier splitting and expansion approaches such as Samurai, TIDIER and more recently GenTest. The ultimate goal of such approaches is to help disambiguating conceptual information encoded in compound (or abbreviated) identifiers. This paper presents TRIS, TRee-based Identifier Splitter, a two-phases approach to split and expand program identifiers. TRIS takes as input a dictionary of words, the identifiers to split/expand, and the identifiers source code application. First, TRIS pre-compiles transformed dictionary words into a tree representation, associating a cost to each transformation. In a second phase, it maps the identifier splitting/expansion problem into a minimization problem, \ie the search of the shortest path (optimal split/expansion) in a weighted graph. We apply TRIS to a sample of 974 identifiers extracted from JHotDraw, 3,085 from Lynx, and to a sample of 489 identifiers extracted from 340 C programs. Also, we compare TRIS with GenTest on a set of 2,663 mixed Java, C and C++ identifiers. We report evidence that TRIS split (and expansion) is more accurate than state-of-the-art approaches and that it is also efficient in terms of computation time.
  17. [17]M. D. Penta, G. Antoniol, D. M. Germán, Y.-G. Guéhéneuc, and B. Adams, “Five days of empirical software engineering: The PASED experience,” in ICSE, 2012, pp. 1255–1258.
    Bibtex
      @inproceedings{conf/icse/PentaAGGA12,
      author = {Penta, Massimiliano Di and Antoniol, Giuliano and Germ{\'a}n, Daniel M. and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Adams, Bram},
      title = {Five days of empirical software engineering: The PASED experience},
      booktitle = {ICSE},
      year = {2012},
      pages = {1255-1258},
      ee = {http://dx.doi.org/10.1109/ICSE.2012.6227017},
      crossref = {DBLP:conf/icse/2012},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Acquiring the skills to plan and conduct different kinds of empirical studies is a mandatory requirement for graduate students working in the field of software engineering. These skills typically can only be developed based on the teaching and experience of the students’ supervisor, because of the lack of specific, practical courses providing these skills. To fill this gap, we organized the first Canadian Summer School on Practical Analyses of Software Engineering Data (PASED). The aim of PASED is to provide—using a “learning by doing” model of teaching—a solid foundation to software engineering graduate students on conducting empirical studies. This paper describes our experience in organizing the PASED school, i.e., what challenges we encountered, how we designed the lectures and laboratories, and what could be improved in the future based on the participants’ feedback.
  18. [18]S. L. Abebe, V. Arnaoudova, P. Tonella, G. Antoniol, and Y.-G. Guéhéneuc, “Can Lexicon Bad Smells Improve Fault Prediction?,” in WCRE, 2012, pp. 235–244.
    Bibtex
      @inproceedings{06385119,
      author = {Abebe, Surafel Lemma and Arnaoudova, Venera and Tonella, Paolo and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {Can Lexicon Bad Smells Improve Fault Prediction?},
      booktitle = {WCRE},
      year = {2012},
      pages = {235-244},
      ee = {http://doi.ieeecomputersociety.org/10.1109/WCRE.2012.33},
      crossref = {DBLP:conf/wcre/2012},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2012/06385119.pdf}
    }
    
      
    Abstract In software development, early identification of fault-prone classes can save a considerable amount of resources. In the literature, source code structural metrics have been widely investigated as one of the factors that can be used to identify faulty classes. Structural metrics measure code complexity, one aspect of the source code quality. Complexity might affect program understanding and hence increase the likelihood of inserting errors in a class. Besides the structural metrics, we believe that the quality of the identifiers used in the code may also affect program understanding and thus increase the likelihood of error insertion. In this study, we measure the quality of identifiers using the number of Lexicon Bad Smells (LBS) they contain. We investigate whether using LBS in addition to structural metrics improves fault prediction. To conduct the investigation, we asses s the prediction capability of a model while using i) only structural metrics, and ii) structural metrics and LBS. The results on three open source systems, ArgoUML, Rhino, and Eclipse, indicate that there is an improvement in the majority of the cases.
  19. [19]O. Gotel et al., “The Grand Challenge of Traceability (v1.0),” in Software and Systems Traceability, 2012, pp. 343–409.
    Bibtex
      @incollection{books/daglib/p/GotelCHZEGDAM12,
      author = {Gotel, Orlena and Cleland-Huang, Jane and Hayes, Jane Huffman and Zisman, Andrea and Egyed, Alexander and Gr{\"u}nbacher, Paul and Dekhtyar, Alex and Antoniol, Giuliano and Maletic, Jonathan I.},
      title = {The Grand Challenge of Traceability (v1.0)},
      booktitle = {Software and Systems Traceability},
      year = {2012},
      pages = {343-409},
      ee = {http://dx.doi.org/10.1007/978-1-4471-2239-5_16},
      crossref = {DBLP:books/daglib/0028967},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  20. [20]N. Ali, Y.-G. Guéhéneuc, and G. Antoniol, “Factors Impacting the Inputs of Traceability Recovery Approaches,” in Software and Systems Traceability, 2012, pp. 99–127.
    Bibtex
      @incollection{chp3A1010072F97814471223955,
      author = {Ali, Nasir and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Factors Impacting the Inputs of Traceability Recovery Approaches},
      booktitle = {Software and Systems Traceability},
      year = {2012},
      pages = {99-127},
      ee = {http://dx.doi.org/10.1007/978-1-4471-2239-5_5},
      crossref = {DBLP:books/daglib/0028967},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2012/chp3A1010072F97814471223955.pdf}
    }
    
      
  21. [21]S. Medini, G. Antoniol, Y.-G. Guéhéneuc, M. D. Penta, and P. Tonella, “SCAN: An Approach to Label and Relate Execution Trace Segments,” in WCRE, 2012, pp. 135–144.
    Bibtex
      @inproceedings{06385109,
      author = {Medini, Soumaya and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Penta, Massimiliano Di and Tonella, Paolo},
      title = {SCAN: An Approach to Label and Relate Execution Trace Segments},
      booktitle = {WCRE},
      year = {2012},
      pages = {135-144},
      ee = {http://doi.ieeecomputersociety.org/10.1109/WCRE.2012.23},
      crossref = {DBLP:conf/wcre/2012},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2012/06385109.pdf}
    }
    
      
    Abstract Identifying concepts in execution traces is a task often necessary to support program comprehension or maintenance activities. Several approaches—static, dynamic or hybrid—have been proposed to identify cohesive, meaningful sequence of methods in execution traces. However, none of the proposed approaches is able to label such segments and to identify relations identified in other segments of the same trace This paper present SCAN (Segment Concept AssigNer) an approach to assign labels to sequences of methods in execution traces, and to identify relations between such segments. SCAN uses information retrieval methods and formal concept analysis to produce sets of words helping the developer to understand the concept implemented by a segment. Specifically, formal concept analysis allows SCAN to discover commonalities between segments in different trace areas, as well as terms more specific to a given segment and higher level relation between segments. The paper describes SCAN along with a preliminary manual validation—upon execution traces collected from usage scenarios of JHotDraw and ArgoUML—of SCAN accuracy in assigning labels representative of concepts implemented by trace segments.
  22. [22]O. Gotel et al., “Traceability Fundamentals,” in Software and Systems Traceability, 2012, pp. 3–22.
    Bibtex
      @incollection{books/daglib/p/GotelCHZEGDAMM12,
      author = {Gotel, Orlena and Cleland-Huang, Jane and Hayes, Jane Huffman and Zisman, Andrea and Egyed, Alexander and Gr{\"u}nbacher, Paul and Dekhtyar, Alex and Antoniol, Giuliano and Maletic, Jonathan I. and M{\"a}der, Patrick},
      title = {Traceability Fundamentals},
      booktitle = {Software and Systems Traceability},
      year = {2012},
      pages = {3-22},
      ee = {http://dx.doi.org/10.1007/978-1-4471-2239-5_1},
      crossref = {DBLP:books/daglib/0028967},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  23. [23]A. Zaidman and G. Antoniol, “Preface to the special issue on reverse engineering (featuring selected papers from WCRE 2009),” Sci. Comput. Program., vol. 77, no. 6, pp. 741–742, 2012.
    Bibtex
      @article{journals/scp/ZaidmanA12,
      author = {Zaidman, Andy and Antoniol, Giuliano},
      title = {Preface to the special issue on reverse engineering (featuring selected papers from WCRE 2009)},
      journal = {Sci. Comput. Program.},
      volume = {77},
      number = {6},
      year = {2012},
      pages = {741-742},
      ee = {http://dx.doi.org/10.1016/j.scico.2011.01.008},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  24. [24]Z. Soh, Z. Sharafi, B. V. den Plas, G. C. Porras, Y.-G. Guéhéneuc, and G. Antoniol, “Professional status and expertise for UML class diagram comprehension: An empirical study,” in ICPC, 2012, pp. 163–172.
    Bibtex
      @inproceedings{conf/iwpc/SohSPPGA12,
      author = {Soh, Z{\'e}phyrin and Sharafi, Zohreh and den Plas, Bertrand Van and Porras, Gerardo Cepeda and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Professional status and expertise for UML class diagram comprehension: An empirical study},
      booktitle = {ICPC},
      year = {2012},
      pages = {163-172},
      ee = {http://dx.doi.org/10.1109/ICPC.2012.6240484},
      crossref = {DBLP:conf/iwpc/2012},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  25. [25]O. Gotel et al., “The quest for Ubiquity: A roadmap for software and systems traceability research,” in RE, 2012, pp. 71–80.
    Bibtex
      @inproceedings{conf/re/GotelCHZEGA12,
      author = {Gotel, Orlena and Cleland-Huang, Jane and Hayes, Jane Huffman and Zisman, Andrea and Egyed, Alexander and Gr{\"u}nbacher, Paul and Antoniol, Giuliano},
      title = {The quest for Ubiquity: A roadmap for software and systems traceability research},
      booktitle = {RE},
      year = {2012},
      pages = {71-80},
      ee = {http://dx.doi.org/10.1109/RE.2012.6345841},
      crossref = {DBLP:conf/re/2012},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  26. [26]A. Maiga et al., “Support vector machines for anti-pattern detection,” in ASE, 2012, pp. 278–281.
    Bibtex
      @inproceedings{conf/kbse/MaigaABSGAA12,
      author = {Maiga, Abdou and Ali, Nasir and Bhattacharya, Neelesh and Sabane, Aminata and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano and A\"{\i}meur, Esma},
      title = {Support vector machines for anti-pattern detection},
      booktitle = {ASE},
      year = {2012},
      pages = {278-281},
      ee = {http://doi.acm.org/10.1145/2351676.2351723},
      crossref = {DBLP:conf/kbse/2012},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  27. [27]S. Hassaine, Y.-G. Guéhéneuc, S. Hamel, and G. Antoniol, “ADvISE: Architectural Decay in Software Evolution,” in CSMR, 2012, pp. 267–276.
    Bibtex
      @inproceedings{conf/csmr/HassaineGHA12,
      author = {Hassaine, Salima and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Hamel, Sylvie and Antoniol, Giuliano},
      title = {ADvISE: Architectural Decay in Software Evolution},
      booktitle = {CSMR},
      year = {2012},
      pages = {267-276},
      ee = {http://dx.doi.org/10.1109/CSMR.2012.34, http://doi.ieeecomputersociety.org/10.1109/CSMR.2012.34},
      crossref = {DBLP:conf/csmr/2012},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  28. [28]G. Antoniol, A. Bertolino, and Y. Labiche, Eds., 2012 IEEE Fifth International Conference on Software Testing, Verification and Validation, Montreal, QC, Canada, April 17-21, 2012. IEEE, 2012.
    Bibtex
      @proceedings{conf/icst/2012,
      editor = {Antoniol, Giuliano and Bertolino, Antonia and Labiche, Yvan},
      title = {2012 IEEE Fifth International Conference on Software Testing, Verification and Validation, Montreal, QC, Canada, April 17-21, 2012},
      booktitle = {ICST},
      publisher = {IEEE},
      year = {2012},
      isbn = {978-1-4577-1906-6},
      ee = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6200016},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  29. [29]F. Khomh, M. D. Penta, Y.-G. Guéhéneuc, and G. Antoniol, “An exploratory study of the impact of antipatterns on class change- and fault-proneness,” Empirical Software Engineering, vol. 17, no. 3, pp. 243–275, 2012.
    Bibtex
      @article{art3A1010072Fs106640119171y,
      author = {Khomh, Foutse and Penta, Massimiliano Di and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {An exploratory study of the impact of antipatterns on class change- and fault-proneness},
      journal = {Empirical Software Engineering},
      volume = {17},
      number = {3},
      year = {2012},
      pages = {243-275},
      ee = {http://dx.doi.org/10.1007/s10664-011-9171-y},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2012/art3A1010072Fs106640119171y.pdf}
    }
    
      
    Abstract Antipatterns are poor design choices that are conjectured to make object-oriented systems harder to maintain. We investigate the impact of antipatterns on classes in object-oriented systems by studying the relation between the presence of antipatterns and the change- and fault-proneness of the classes. We detect 13 antipatterns in 54 releases of ArgoUML, Eclipse, Mylyn, and Rhino, and analyse (1) to what extent classes participating in antipatterns have higher odds to change or to be subject to fault-fixing than other classes, (2) to what extent these odds (if higher) are due to the sizes of the classes or to the presence of antipatterns, and (3) what kinds of changes affect classes participating in antipatterns. We show that, in almost all releases of the four systems, classes participating in antipatterns are more change-and fault-prone than others. We also show that size alone cannot explain the higher odds of classes with antipatterns to underwent a (fault-fixing) change than other classes. Finally, we show that structural changes affect more classes with antipatterns than others. We provide qualitative explanations of the increase of change- and fault-proneness in classes participating in antipatterns using release notes and bug reports. The obtained results justify a posteriori previous work on the specification and detection of antipatterns and could help to better focus quality assurance and testing activities.
  30. [30]Z. Sharafi, Z. Soh, Y.-G. Guéhéneuc, and G. Antoniol, “Women and men - Different but equal: On the impact of identifier style on source code reading,” in ICPC, 2012, pp. 27–36.
    Bibtex
      @inproceedings{conf/iwpc/SharafiSGA12,
      author = {Sharafi, Zohreh and Soh, Z{\'e}phyrin and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Women and men - Different but equal: On the impact of identifier style on source code reading},
      booktitle = {ICPC},
      year = {2012},
      pages = {27-36},
      ee = {http://dx.doi.org/10.1109/ICPC.2012.6240505},
      crossref = {DBLP:conf/iwpc/2012},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  31. [31]S. Hassaine, F. Boughanmi, Y.-G. Guéhéneuc, S. Hamel, and G. Antoniol, “A seismology-inspired approach to study change propagation,” in ICSM, 2011, pp. 53–62.
    Bibtex
      @inproceedings{conf/icsm/HassaineBGHA11,
      author = {Hassaine, Salima and Boughanmi, Ferdaous and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Hamel, Sylvie and Antoniol, Giuliano},
      title = {A seismology-inspired approach to study change propagation},
      booktitle = {ICSM},
      year = {2011},
      pages = {53-62},
      ee = {http://dx.doi.org/10.1109/ICSM.2011.6080772},
      crossref = {DBLP:conf/icsm/2011},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  32. [32]S. Medini, P. Galinier, M. D. Penta, Y.-G. Guéhéneuc, and G. Antoniol, “A Fast Algorithm to Locate Concepts in Execution Traces,” in SSBSE, 2011, pp. 252–266.
    Bibtex
      @inproceedings{chp3A1010072F978364223716422,
      author = {Medini, Soumaya and Galinier, Philippe and Penta, Massimiliano Di and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {A Fast Algorithm to Locate Concepts in Execution Traces},
      booktitle = {SSBSE},
      year = {2011},
      pages = {252-266},
      ee = {http://dx.doi.org/10.1007/978-3-642-23716-4_22},
      crossref = {DBLP:conf/ssbse/2011},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2011/chp3A1010072F978364223716422.pdf}
    }
    
      
    Abstract The identification of cohesive segments in execution traces is an important step in concept location which, in turns, is of paramount importance for many program-comprehension activities. In this paper, we reformulate concept location as a trace segmentation problem solved via dynamic programming. Differently to approaches based on genetic algorithms, dynamic programming can compute an exact solution with better performance than previous approaches, even on long traces. We describe the new problem formulation and the algorithmic details of our approach. We then compare the performances of dynamic programming with those of a genetic algorithm, showing that dynamic programming reduces dramatically the time required to segment traces, without sacrificing precision and recall; even slightly improving them.
  33. [33]N. Ali, Y.-G. Guéhéneuc, and G. Antoniol, “Trust-Based Requirements Traceability,” in ICPC, 2011, pp. 111–120.
    Bibtex
      @inproceedings{05970169,
      author = {Ali, Nasir and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Trust-Based Requirements Traceability},
      booktitle = {ICPC},
      year = {2011},
      pages = {111-120},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ICPC.2011.42},
      crossref = {DBLP:conf/iwpc/2011},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2011/05970169.pdf}
    }
    
      
    Abstract Information retrieval (IR) approaches have proven useful in recovering traceability links between free-text documentation and source code. IR-based traceability recovery approaches produce ranked lists of traceability links between pieces of documentation and of source code. These traceability links are then pruned using various strategies and, finally, validated by human experts. In this paper we propose two contributions to improve the precision and recall of traceability links and, thus, reduces the required human experts’ manual validation effort. First, we propose a novel approach, Trustrace, inspired by Web trust models to improve precision and recall of traceability links: Trustrace first uses any traceability recovery approach as the basis on which, second, it applies various experts’ opinions to add, remove, and–or adjust the rankings of the traceability links. The experts can be human experts or other traceability recovery approaches. Second, we propose a novel traceability recovery approach, Histrace, to identify traceability links between requirements and source code through CVS/SVN change logs using a Vector Space Model (VSM). We combine a traditional recovery traceability approach with Histrace to build Trustrace VSM, Histrace in which we use Histrace as one expert commenting the traceability links recovered using the VSM-based approach. We apply TrustraceVSM, Histrace on two case studies to compare its traceability links with those recovered using only the VSM-based approach, in terms of precision and recall. We show that Trustrace VSM, Histrace improves with statistical significance the precision of the traceability links while also improving recall but without statistical significance. We thus show that our trust-based approach indeed improves precision and recall and also that CVS/SVN change logs are useful in the traceability recovery process.
  34. [34]N. Ali, Y.-G. Guéhéneuc, and G. Antoniol, “Requirements Traceability for Object Oriented Systems by Partitioning Source Code,” in WCRE, 2011, pp. 45–54.
    Bibtex
      @inproceedings{06079774,
      author = {Ali, Nasir and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Requirements Traceability for Object Oriented Systems by Partitioning Source Code},
      booktitle = {WCRE},
      year = {2011},
      pages = {45-54},
      ee = {http://doi.ieeecomputersociety.org/10.1109/WCRE.2011.16},
      crossref = {DBLP:conf/wcre/2011},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2011/06079774.pdf}
    }
    
      
    Abstract Requirements trace ability ensures that source code is consistent with documentation and that all requirements have been implemented. During software evolution, features are added, removed, or modified, the code drifts away from its original requirements. Thus trace ability recovery approaches becomes necessary to re-establish the trace ability relations between requirements and source code. This paper presents an approach (Coparvo) complementary to existing trace ability recovery approaches for object-oriented programs. Coparvo reduces false positive links recovered by traditional trace ability recovery processes thus reducing the manual validation effort. Coparvo assumes that information extracted from different entities (i.e., class names, comments, class variables, or methods signatures) are different information sources, they may have different level of reliability in requirements trace ability and each information source may act as a different expert recommending trace ability links. We applied Coparvo on three data sets, Pooka, SIP Communicator, and iTrust, to filter out false positive links recovered via the information retrieval approach, i.e., vector space model. The results show that Coparvo significantly improves the of the recovered links accuracy and also reduces up to 83% effort required to manually remove false positive links.
  35. [35]N. Bhattacharya, A. Sakti, G. Antoniol, Y.-G. Guéhéneuc, and G. Pesant, “Divide-by-Zero Exception Raising via Branch Coverage,” in SSBSE, 2011, pp. 204–218.
    Bibtex
      @inproceedings{chp3A1010072F978364223716419,
      author = {Bhattacharya, Neelesh and Sakti, Abdelilah and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Pesant, Gilles},
      title = {Divide-by-Zero Exception Raising via Branch Coverage},
      booktitle = {SSBSE},
      year = {2011},
      pages = {204-218},
      ee = {http://dx.doi.org/10.1007/978-3-642-23716-4_19},
      crossref = {DBLP:conf/ssbse/2011},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2011/chp3A1010072F978364223716419.pdf}
    }
    
      
    Abstract In this paper, we discuss how a search-based branch coverage approach can be used to design an effective test data generation approach, specifically targeting divide-by-zero exceptions. We first propose a novel testability transformation combining approach level and branch distance. We then use different search strategies, i.e., hill climbing, simulated annealing, and genetic algorithm, to evaluate the performance of the novel testability transformation on a small synthetic example as well as on methods known to throw divide-by-zero exceptions, extracted from real world systems, namely Eclipse and Android. Finally, we also describe how the test data generation for divide-by-zero exceptions can be formulated as a constraint programming problem and compare the resolution of this problem with a genetic algorithm in terms of execution time. We thus report evidence that genetic algorithm using our novel testability transformation out-performs hill climbing and simulated annealing and a previous approach (in terms of numbers of fitness evaluation) but is out-performed by constraint programming (in terms of execution time).
  36. [36]F. Jaafar, Y.-G. Guéhéneuc, S. Hamel, and G. Antoniol, “An Exploratory Study of Macro Co-changes,” in WCRE, 2011, pp. 325–334.
    Bibtex
      @inproceedings{06079858,
      author = {Jaafar, Fehmi and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Hamel, Sylvie and Antoniol, Giuliano},
      title = {An Exploratory Study of Macro Co-changes},
      booktitle = {WCRE},
      year = {2011},
      pages = {325-334},
      ee = {http://doi.ieeecomputersociety.org/10.1109/WCRE.2011.47},
      crossref = {DBLP:conf/wcre/2011},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2011/06079858.pdf}
    }
    
      
    Abstract The literature describes several approaches to identify the artefacts of programs that change together to reveal the (hidden) dependencies among these artefacts. These approaches analyse historical data, mined from version control systems, and report co-changing artefacts, which hint at the causes, consequences, and actors of the changes. We introduce the novel concepts of macro co-changes (MCC), i.e., of artefacts that co-change within a large time interval, and of dephase macro co-changes (DMCC), i.e., macro co-changes that always happen with the same shifts in time. We describe typical scenarios of MCC and DMCC and we use the Hamming distance to detect approximate occurrences of MCC and DMCC. We present our approach, Macocha, to identify these concepts in large programs. We apply Macocha and compare it in terms of precision and recall with UML Diff (file stability) and association rules (co-changing files) on four systems: Argo UML, Free BSD, SIP, and XalanC. We also use external information to validate the (approximate) MCC and DMCC found by Macocha. We thus answer two research questions showing the existence and usefulness of theses concepts and explaining scenarios of hidden dependencies among artefacts.
  37. [37]M. D. Penta, M. Harman, and G. Antoniol, “The use of search-based optimization techniques to schedule and staff software projects: an approach and an empirical study,” Softw., Pract. Exper., vol. 41, no. 5, pp. 495–519, 2011.
    Bibtex
      @article{1001_ftp,
      author = {Penta, Massimiliano Di and Harman, Mark and Antoniol, Giuliano},
      title = {The use of search-based optimization techniques to schedule and staff software projects: an approach and an empirical study},
      journal = {Softw., Pract. Exper.},
      volume = {41},
      number = {5},
      year = {2011},
      pages = {495-519},
      ee = {http://dx.doi.org/10.1002/spe.1001},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2011/1001_ftp.pdf}
    }
    
      
    Abstract Allocating resources to a software project and assigning tasks to teams constitute crucial activities that affect project cost and completion time. Finding a solution for such a problem is NP-hard; this requires managers to be supported by proper tools in performing such an allocation. This paper shows how search-based optimization techniques can be combined with a queuing simulation model to address these problems. The obtained staff and task allocations aim to minimize the completion time and reduce schedule fragmentation. The proposed approach allows project managers to run multiple simulations, compare results and consider trade-offs between increasing the staffing level and anticipating the project completion date and between reducing the fragmentation and accepting project delays. The paper presents results from the application of the proposed search-based project planning approach to data obtained from two large scale commercial software maintenance projects.
  38. [38]A. Belderrar, S. Kpodjedo, Y.-G. Guéhéneuc, G. Antoniol, and P. Galinier, “Sub-graph Mining: Identifying Micro-architectures in Evolving Object-Oriented Software,” in CSMR, 2011, pp. 171–180.
    Bibtex
      @inproceedings{05741259,
      author = {Belderrar, Ahmed and Kpodjedo, Segla and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano and Galinier, Philippe},
      title = {Sub-graph Mining: Identifying Micro-architectures in Evolving Object-Oriented Software},
      booktitle = {CSMR},
      year = {2011},
      pages = {171-180},
      ee = {http://dx.doi.org/10.1109/CSMR.2011.23},
      crossref = {DBLP:conf/csmr/2011},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2011/05741259.pdf}
    }
    
      
    Abstract Developers introduce novel and undocumented micro-architectures when performing evolution tasks on object-oriented applications. We are interested in understanding whether those organizations of classes and relations can bear, much like cataloged design and anti-patterns, potential harm or benefit to an object-oriented application. We present SGFinder, a sub-graph mining approach and tool based on an efficient enumeration technique to identify recurring micro-architectures in object-oriented class diagrams. Once SGFinder has detected instances of micro-architectures, we exploit these instances to identify their desirable properties, such as stability, or unwanted properties, such as change or fault proneness. We perform a feasibility study of our approach by applying SGFinder on the reverse-engineered class diagrams of several releases of two Java applications: ArgoUML and Rhino. We characterize and highlight some of the most interesting micro-architectures, e.g., the most fault prone and the most stable, and conclude that SGFinder opens the way to further interesting studies.
  39. [39]B. Dit, L. Guerrouj, D. Poshyvanyk, and G. Antoniol, “Can Better Identifier Splitting Techniques Help Feature Location?,” in ICPC, 2011, pp. 11–20.
    Bibtex
      @inproceedings{05970159,
      author = {Dit, Bogdan and Guerrouj, Latifa and Poshyvanyk, Denys and Antoniol, Giuliano},
      title = {Can Better Identifier Splitting Techniques Help Feature Location?},
      booktitle = {ICPC},
      year = {2011},
      pages = {11-20},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ICPC.2011.47},
      crossref = {DBLP:conf/iwpc/2011},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2011/05970159.pdf}
    }
    
      
    Abstract The paper presents an exploratory study of two feature location techniques utilizing three strategies for splitting identifiers: CamelCase, Samurai and manual splitting of identifiers. The main research question that we ask in this study is if we had a perfect technique for splitting identifiers, would it still help improve accuracy of feature location techniques applied in different scenarios and settings? In order to answer this research question we investigate two feature location techniques, one based on Information Retrieval and the other one based on the combination of Information Retrieval and dynamic analysis, for locating bugs and features using various configurations of preprocessing strategies on two open-source systems, Rhino and jEdit. The results of an extensive empirical evaluation reveal that feature location techniques using Information Retrieval can benefit from better preprocessing algorithms in some cases, and that their improvement in effectiveness while using manual splitting over state-of-the-art approaches is statistically significant in those cases. However, the results for feature location technique using the combination of Information Retrieval and dynamic analysis do not show any improvement while using manual splitting, indicating that any preprocessing technique will suffice if execution data is available. Overall, our findings outline potential benefits of putting additional research efforts into defining more sophisticated source code preprocessing techniques as they can still be useful in situations where execution information cannot be easily collected.
  40. [40]S. Hassaine, F. Boughanmi, Y.-G. Guéhéneuc, S. Hamel, and G. Antoniol, “Change Impact Analysis: An Earthquake Metaphor,” in ICPC, 2011, pp. 209–210.
    Bibtex
      @inproceedings{05970184,
      author = {Hassaine, Salima and Boughanmi, Ferdaous and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Hamel, Sylvie and Antoniol, Giuliano},
      title = {Change Impact Analysis: An Earthquake Metaphor},
      booktitle = {ICPC},
      year = {2011},
      pages = {209-210},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ICPC.2011.54},
      crossref = {DBLP:conf/iwpc/2011},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2011/05970184.pdf}
    }
    
      
    Abstract Impact analysis is crucial to make decisions among different alternative implementations and to anticipate future maintenance tasks. Several approaches were proposed to identify software artefacts being affected by a change. However, to the best of our knowledge, none of these approaches have been used to study the scope of changes in a program. Yet, this information would help developers assess their change efforts and perform more adequate changes. Thus, we present a metaphor inspired by seismology and propose a mapping between the concepts of seismology and software evolution. We show the applicability and usefulness of our metaphor using Rhino and Xerces-J.
  41. [41]S. Kpodjedo, F. Ricca, P. Galinier, Y.-G. Guéhéneuc, and G. Antoniol, “Design evolution metrics for defect prediction in object oriented systems,” Empirical Software Engineering, vol. 16, no. 1, pp. 141–175, 2011.
    Bibtex
      @article{art3A1010072Fs1066401091517,
      author = {Kpodjedo, Segla and Ricca, Filippo and Galinier, Philippe and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Design evolution metrics for defect prediction in object oriented systems},
      journal = {Empirical Software Engineering},
      volume = {16},
      number = {1},
      year = {2011},
      pages = {141-175},
      ee = {http://dx.doi.org/10.1007/s10664-010-9151-7},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2011/art3A1010072Fs1066401091517.pdf}
    }
    
      
    Abstract Testing is the most widely adopted practice to ensure software quality. However, this activity is often a compromise between the available resources and software quality. In object-oriented development, testing effort should be focused on defective classes. Unfortunately, identifying those classes is a challenging and difficult activity on which many metrics, techniques, and models have been tried. In this paper, we investigate the usefulness of elementary design evolution metrics to identify defective classes. The metrics include the numbers of added, deleted, and modified attributes, methods, and relations. The metrics are used to recommend a ranked list of classes likely to contain defects for a system. They are compared to Chidamber and Kemerer’s metrics on several versions of Rhino and of ArgoUML. Further comparison is conducted with the complexity metrics computed by Zimmermann et al. on several releases of Eclipse. The comparisons are made according to three criteria: presence of defects, number of defects, and defect density in the top-ranked classes. They show that the design evolution metrics, when used in conjunction with known metrics, improve the identification of defective classes. In addition, they show that the design evolution metrics make significantly better predictions of defect density than other metrics and, thus, can help in reducing the testing effort by focusing test activity on a reduced volume of code.
  42. [42]M. Abbes, F. Khomh, Y.-G. Guéhéneuc, and G. Antoniol, “An Empirical Study of the Impact of Two Antipatterns, Blob and Spaghetti Code, on Program Comprehension,” in CSMR, 2011, pp. 181–190.
    Bibtex
      @inproceedings{conf/csmr/AbbesKGA11,
      author = {Abbes, Marwen and Khomh, Foutse and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {An Empirical Study of the Impact of Two Antipatterns, Blob and Spaghetti Code, on Program Comprehension},
      booktitle = {CSMR},
      year = {2011},
      pages = {181-190},
      ee = {http://dx.doi.org/10.1109/CSMR.2011.24},
      crossref = {DBLP:conf/csmr/2011},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  43. [43]D. Romano, M. D. Penta, and G. Antoniol, “An Approach for Search Based Testing of Null Pointer Exceptions,” in ICST, 2011, pp. 160–169.
    Bibtex
      @inproceedings{conf/icst/RomanoPA11,
      author = {Romano, Daniele and Penta, Massimiliano Di and Antoniol, Giuliano},
      title = {An Approach for Search Based Testing of Null Pointer Exceptions},
      booktitle = {ICST},
      year = {2011},
      pages = {160-169},
      ee = {http://dx.doi.org/10.1109/ICST.2011.49, http://doi.ieeecomputersociety.org/10.1109/ICST.2011.49},
      crossref = {DBLP:conf/icst/2011},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Uncaught exceptions, and in particular null pointer exceptions (NPEs), constitute a major cause of crashes for software systems. Although tools for the s tatic identification of potential NPEs exist, there is need for proper approaches able to identify system execution scenarios causing NPEs. This paper proposes a search-based test data generation approach aimed at automatically identify NPEs. The approach consists of two steps: (i) an inter-p rocedural data and control flow analysis, relying on existing technology,that identifies paths between input parameters and potential NPEs, and (ii) a genetic algorithm that evolves a population of test data with the aim of covering such paths. The algorithm is able to deal with complex inputs containi ng arbitrary data structures. The approach has been evaluated on to test class clusters from six Java open source systems, where NPE bugs have been artificially introduced. Results sh ow that the approach is, indeed, able to identify the NPE bugs, and it outperforms random testing. Also, we show how the approach is able to identify rea l NPE bugs some of which are posted in the bug-tracking system of the Apache libraries.
  44. [44]S. Bouktif, F. Ahmed, I. Khalil, and G. Antoniol, “Corrigendum to ‘A novel composite model approach to improve software quality prediction’ [Information and Software Technology 52 (12) (2010) 1298-1311],” Information & Software Technology, vol. 53, no. 3, p. 291, 2011.
    Bibtex
      @article{journals/infsof/BouktifAKA11,
      author = {Bouktif, Salah and Ahmed, Faheem and Khalil, Issa and Antoniol, Giuliano},
      title = {Corrigendum to "A novel composite model approach to improve software quality prediction" [Information and Software Technology 52 (12) (2010) 1298-1311]},
      journal = {Information {\&} Software Technology},
      volume = {53},
      number = {3},
      year = {2011},
      pages = {291},
      ee = {http://dx.doi.org/10.1016/j.infsof.2010.12.004},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  45. [45]N. Ali, W. Wu, G. Antoniol, M. D. Penta, Y.-G. Guéhéneuc, and J. H. Hayes, “MoMS: Multi-objective miniaturization of software,” in ICSM, 2011, pp. 153–162.
    Bibtex
      @inproceedings{conf/icsm/AliWAPGH11,
      author = {Ali, Nasir and Wu, Wei and Antoniol, Giuliano and Penta, Massimiliano Di and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Hayes, Jane Huffman},
      title = {MoMS: Multi-objective miniaturization of software},
      booktitle = {ICSM},
      year = {2011},
      pages = {153-162},
      ee = {http://dx.doi.org/10.1109/ICSM.2011.6080782},
      crossref = {DBLP:conf/icsm/2011},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  46. [46]L. M. Eshkevari, V. Arnaoudova, M. D. Penta, R. Oliveto, Y.-G. Guéhéneuc, and G. Antoniol, “An exploratory study of identifier renamings,” in MSR, 2011, pp. 33–42.
    Bibtex
      @inproceedings{conf/msr/EshkevariAPOGA11,
      author = {Eshkevari, Laleh Mousavi and Arnaoudova, Venera and Penta, Massimiliano Di and Oliveto, Rocco and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {An exploratory study of identifier renamings},
      booktitle = {MSR},
      year = {2011},
      pages = {33-42},
      ee = {http://doi.acm.org/10.1145/1985441.1985449},
      crossref = {DBLP:conf/msr/2011},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  47. [47]S. Kpodjedo, P. Galinier, and G. Antoniol, “On the use of similarity metrics for approximate graph matching,” Electronic Notes in Discrete Mathematics, vol. 36, pp. 687–694, 2010.
    Bibtex
      @article{journals/endm/KpodjedoGA10,
      author = {Kpodjedo, Segla and Galinier, Philippe and Antoniol, Giuliano},
      title = {On the use of similarity metrics for approximate graph matching},
      journal = {Electronic Notes in Discrete Mathematics},
      volume = {36},
      year = {2010},
      pages = {687-694},
      ee = {http://dx.doi.org/10.1016/j.endm.2010.05.087},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  48. [48]S. Kpodjedo, P. Galinier, and G. Antoniol, “Enhancing a Tabu Algorithm for Approximate Graph Matching by Using Similarity Measures,” in EvoCOP, 2010, pp. 119–130.
    Bibtex
      @inproceedings{conf/evoW/KpodjedoGA10,
      author = {Kpodjedo, Segla and Galinier, Philippe and Antoniol, Giuliano},
      title = {Enhancing a Tabu Algorithm for Approximate Graph Matching by Using Similarity Measures},
      booktitle = {EvoCOP},
      year = {2010},
      pages = {119-130},
      ee = {http://dx.doi.org/10.1007/978-3-642-12139-5_11},
      crossref = {DBLP:conf/evoW/2010cop},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract In this paper, we investigate heuristics in order to solve the Approximated Matching Problem (AGM). We propose a tabu search algorithm which exploits a simple neighborhood but is initialized by a greedy procedure which uses a measure of similarity between the vertices of the two graphs. The algorithm is tested on a large collection of graphs of various sizes (from 300 vertices and up to 3000 vertices) and densities. Computing times range from less than 1 second up to a few minutes. The algorithm obtains consistently very good results, especially on labeled graphs. The results obtained by the tabu algorithm alone (without the greedy procedure) were very poor, illustrating the importance of using vertex similarity during the early steps of the search process.
  49. [49]N. Haderer, F. Khomh, and G. Antoniol, “SQUANER: A framework for monitoring the quality of software systems,” in ICSM, 2010, pp. 1–4.
    Bibtex
      @inproceedings{05609684,
      author = {Haderer, Nicolas and Khomh, Foutse and Antoniol, Giuliano},
      title = {SQUANER: A framework for monitoring the quality of software systems},
      booktitle = {ICSM},
      year = {2010},
      pages = {1-4},
      ee = {http://dx.doi.org/10.1109/ICSM.2010.5609684},
      crossref = {DBLP:conf/icsm/2010},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2010/05609684.pdf}
    }
    
      
    Abstract Despite the large number of quality models and publicly available quality assessment tools like PMD, Checkstyle, or FindBugs, very few studies have investigated the use of quality models by developers in their daily activities. One reason for this lack of studies is the absence of integrated environments for monitoring the evolution of software quality. We propose SQUANER (Software QUality ANalyzER), a framework for monitoring the evolution of the quality of object-oriented systems. SQUANER connects directly to the SVN of a system, extracts the source code, and perform quality evaluations and faults predictions every time a commit is made by a developer. After quality analysis, a feedback is provided to developers with instructions on how to improve their code.
  50. [50]R. Oliveto, F. Khomh, G. Antoniol, and Y.-G. Guéhéneuc, “Numerical Signatures of Antipatterns: An Approach Based on B-Splines,” in CSMR, 2010, pp. 248–251.
    Bibtex
      @inproceedings{05714444,
      author = {Oliveto, Rocco and Khomh, Foutse and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {Numerical Signatures of Antipatterns: An Approach Based on B-Splines},
      booktitle = {CSMR},
      year = {2010},
      pages = {248-251},
      ee = {http://dx.doi.org/10.1109/CSMR.2010.47},
      crossref = {DBLP:conf/csmr/2010},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2010/05714444.pdf}
    }
    
      
    Abstract Antipatterns are poor object-oriented solutions to recurring design problems. The identification of occurrences of antipatterns in systems has received recently some attention but current approaches have two main limitations: either (1) they classify classes strictly as being or not antipatterns, and thus cannot report accurate information for borderline classes, or (2) they return the probabilities of classes to be antipatterns but they require an expensive tuning by experts to have acceptable accuracy. To mitigate such limitations, we introduce a new identification approach, ABS (Antipattern identification using B-Splines), based on a similarity computed via a numerical analysis technique using B-splines. We illustrate our approach on the Blob and compare it with DECOR, which uses strict thresholds, and with another approach based on Bayesian Beliefs Networks. We show that our approach generally outperforms previous approaches in terms of accuracy.
  51. [51]F. Asadi, M. D. Penta, G. Antoniol, and Y.-G. Guéhéneuc, “A Heuristic-Based Approach to Identify Concepts in Execution Traces,” in CSMR, 2010, pp. 31–40.
    Bibtex
      @inproceedings{05714415,
      author = {Asadi, Fatemeh and Penta, Massimiliano Di and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {A Heuristic-Based Approach to Identify Concepts in Execution Traces},
      booktitle = {CSMR},
      year = {2010},
      pages = {31-40},
      ee = {http://dx.doi.org/10.1109/CSMR.2010.17},
      crossref = {DBLP:conf/csmr/2010},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2010/05714415.pdf}
    }
    
      
    Abstract Concept or feature identification, i.e., the identification of the source code fragments implementing a particular feature, is a crucial task during software understanding and maintenance. This paper proposes an approach to identify concepts in execution traces by finding cohesive and decoupled fragments of the traces. The approach relies on search-based optimization techniques, textual analysis of the system source code using latent semantic indexing, and trace compression techniques. It is evaluated to identify features from execution traces of two open source systems from different domains, JHotDraw and ArgoUML. Results show that the approach is always able to identify trace segments implementing concepts with a high precision and, for highly cohesive concepts, with a high overlap with the manually-built oracle.
  52. [52]N. Madani, L. Guerrouj, M. D. Penta, Y.-G. Guéhéneuc, and G. Antoniol, “Recognizing Words from Source Code Identifiers Using Speech Recognition Techniques,” in CSMR, 2010, pp. 68–77.
    Bibtex
      @inproceedings{conf/csmr/MadaniGPGA10,
      author = {Madani, Nioosha and Guerrouj, Latifa and Penta, Massimiliano Di and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Recognizing Words from Source Code Identifiers Using Speech Recognition Techniques},
      booktitle = {CSMR},
      year = {2010},
      pages = {68-77},
      ee = {http://dx.doi.org/10.1109/CSMR.2010.31},
      crossref = {DBLP:conf/csmr/2010},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract The existing software engineering literature has empirically shown that a proper choice of identifiers influences software understandability and maintainability. Researchers have noticed that identifiers are one of the most important source of information about program entities and that the semantic of identifier components guide the cognitive process. Recognizing the words forming identifiers is not an easy task when naming conventions (e.g,, Camel Case) are not used or strictly followed and–or when these words have been abbreviated or otherwise transformed. This paper proposes a technique inspired from speech recognition, dynamic time warping, to split identifiers into component words. The proposed technique has been applied to identifiers extracted from two different applications: JHotDraw and Lynx. Results compared with manually-built oracles and with Camel Case split are encouraging. In fact, they show that the technique successfully recognize words composing identifiers (even when abbreviated) in about 90% of cases and that it performs better than Camel Case. Furthermore, it was even able to spot mistakes in the manually built oracle.
  53. [53]V. Arnaoudova, L. M. Eshkevari, R. Oliveto, Y.-G. Guéhéneuc, and G. Antoniol, “Physical and conceptual identifier dispersion: Measures and relation to fault proneness,” in ICSM, 2010, pp. 1–5.
    Bibtex
      @inproceedings{conf/icsm/ArnaoudovaEOGA10,
      author = {Arnaoudova, Venera and Eshkevari, Laleh Mousavi and Oliveto, Rocco and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Physical and conceptual identifier dispersion: Measures and relation to fault proneness},
      booktitle = {ICSM},
      year = {2010},
      pages = {1-5},
      ee = {http://dx.doi.org/10.1109/ICSM.2010.5609748},
      crossref = {DBLP:conf/icsm/2010},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Poorly-chosen identifiers have been reported in the literature as misleading and increasing the program comprehension effort. Identifiers are composed of terms, which can be dictionary words, acronyms, contractions, or simple strings. We conjecture that the use of identical terms in different contexts may increase the risk of faults. We investigate our conjecture using a measure combining term entropy and term context-coverage to study whether certain terms increase the odds ratios of methods to be fault-prone. We compute term entropy and context-coverage in Rhino v1.4R3 and ArgoUML v0.16, and we show statistically that methods and attributes containing terms with high entropy and context-coverage are more fault-prone.
  54. [54]J. K.-Y. Ng, Y.-G. Guéhéneuc, and G. Antoniol, “Identification of behavioural and creational design motifs through dynamic analysis,” Journal of Software Maintenance, vol. 22, no. 8, pp. 597–627, 2010.
    Bibtex
      @article{journals/smr/NgGA10,
      author = {Ng, Janice Ka-Yee and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Identification of behavioural and creational design motifs through dynamic analysis},
      journal = {Journal of Software Maintenance},
      volume = {22},
      number = {8},
      year = {2010},
      pages = {597-627},
      ee = {http://dx.doi.org/10.1002/smr.421},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Design patterns offer design motifs, solutions to object-oriented design problems. Design motifs lead to well-structured designs and thus are believed to ease software maintenance. However, after use, they are often ‘lost’ and are consequently of little help during program comprehension and other maintenance activities. Therefore, several works proposed design pattern identification approaches to recover occurrences of the motifs. These approaches mainly used the structure and organization of classes as input. Consequently, they have a low precision when considering behavioural and creational motifs, which pertain to the assignment of responsibilities and the collaborations among objects at runtime. We propose MoDeC, an approach to describe behavioural and creational motifs as collaborations among objects in the form of scenario diagrams. We identify these motifs using dynamic analysis and constraint programming. Using a proof-of-concept implementation of MoDeC and different scenarios for five other Java programs and Builder, Command, and Visitor, we show that MoDeC has a better precision than the state-of-the-art static approaches.
  55. [55]G. Bavota, R. Oliveto, A. D. Lucia, G. Antoniol, and Y.-G. Guéhéneuc, “Playing with refactoring: Identifying extract class opportunities through game theory,” in ICSM, 2010, pp. 1–5.
    Bibtex
      @inproceedings{05609739,
      author = {Bavota, Gabriele and Oliveto, Rocco and Lucia, Andrea De and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {Playing with refactoring: Identifying extract class opportunities through game theory},
      booktitle = {ICSM},
      year = {2010},
      pages = {1-5},
      ee = {http://dx.doi.org/10.1109/ICSM.2010.5609739},
      crossref = {DBLP:conf/icsm/2010},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2010/05609739.pdf}
    }
    
      
    Abstract In software engineering, developers must often find solutions to problems balancing competing goals, e.g., quality versus cost, time to market versus resources, or cohesion versus coupling. Finding a suitable balance between contrasting goals is often complex and recommendation systems are useful to support developers and managers in performing such a complex task. We believe that contrasting goals can be often dealt with game theory techniques. Indeed, game theory is successfully used in other fields, especially in economics, to mathematically propose solutions to strategic situation, in which an individual’s success in making choices depends on the choices of others. To demonstrate the applicability of game theory to software engineering and to understand its pros and cons, we propose an approach based on game theory that recommend extract-class refactoring opportunities. A preliminary evaluation inspired by mutation testing demonstrates the applicability and the benefits of the proposed approach.
  56. [56]S. Bouktif, F. Ahmed, I. Khalil, and G. Antoniol, “A novel composite model approach to improve software quality prediction,” Information & Software Technology, vol. 52, no. 12, pp. 1298–1311, 2010.
    Bibtex
      @article{journals/infsof/BouktifAKA10,
      author = {Bouktif, Salah and Ahmed, Faheem and Khalil, Issa and Antoniol, Giuliano},
      title = {A novel composite model approach to improve software quality prediction},
      journal = {Information {\&} Software Technology},
      volume = {52},
      number = {12},
      year = {2010},
      pages = {1298-1311},
      ee = {http://dx.doi.org/10.1016/j.infsof.2010.07.003},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  57. [57]W. Wu, Y.-G. Guéhéneuc, G. Antoniol, and M. Kim, “AURA: a hybrid approach to identify framework evolution,” in ICSE (1), 2010, pp. 325–334.
    Bibtex
      @inproceedings{p325-wu,
      author = {Wu, Wei and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano and Kim, Miryung},
      title = {AURA: a hybrid approach to identify framework evolution},
      booktitle = {ICSE (1)},
      year = {2010},
      pages = {325-334},
      ee = {http://doi.acm.org/10.1145/1806799.1806848},
      crossref = {DBLP:conf/icse/2010-1},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2010/p325-wu.pdf}
    }
    
      
    Abstract Software frameworks and libraries are indispensable to to- day’s software systems. As they evolve, it is often time- consuming for developers to keep their code up-to-date, so approaches have been proposed to facilitate this. Usually, these approaches cannot automatically identify change rules for one-replaced-by-many and many-replaced-by-one meth- ods, and they trade off recall for higher precision using one or more experimentally-evaluated thresholds. We introduce AURA, a novel hybrid approach that combines call depen- dency and text similarity analyses to overcome these limita- tions. We implement it in a Java system and compare it on five frameworks with three previous approaches by Dagenais and Robillard, M. Kim et al., and Sch ̈fer et al. The compar- a ison shows that, on average, the recall of AURA is 53.07 % higher while its precision is similar, e.g., 0.10 % lower.
  58. [58]M. D. Penta, D. M. Germán, Y.-G. Guéhéneuc, and G. Antoniol, “An exploratory study of the evolution of software licensing,” in ICSE (1), 2010, pp. 145–154.
    Bibtex
      @inproceedings{p145-di_penta,
      author = {Penta, Massimiliano Di and Germ{\'a}n, Daniel M. and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {An exploratory study of the evolution of software licensing},
      booktitle = {ICSE (1)},
      year = {2010},
      pages = {145-154},
      ee = {http://doi.acm.org/10.1145/1806799.1806824},
      crossref = {DBLP:conf/icse/2010-1},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2010/p145-di_penta.pdf}
    }
    
      
    Abstract Free and open source software (FOSS) is distributed and made available to users under different software licenses, mentioned in FOSS code by means of licensing statements. Various factors, such as changes in the legal landscape, commercial code licensed as FOSS, or code reused from other FOSS systems, lead to evolution of licensing, which may affect the way a system or part of it can be subsequently used. Therefore, it is crucial to monitor licensing evolution. However, manually tracking the licensing evolution of thousands of files is a daunting task. After presenting several cases about the effects of licensing evolution, we argue that developers and system integrators must monitor licensing evolution and they need an automatic approach due of the sheer size of FOSS. We propose an approach to automatically track changes occurring in the licensing terms of a system and report an empirical study of the licensing evolution of six different FOSS systems. Results show that licensing underwent frequent and substantial changes.
  59. [59]M. D. Penta, D. M. Germán, and G. Antoniol, “Identifying licensing of jar archives using a code-search approach,” in MSR, 2010, pp. 151–160.
    Bibtex
      @inproceedings{05463282,
      author = {Penta, Massimiliano Di and Germ{\'a}n, Daniel M. and Antoniol, Giuliano},
      title = {Identifying licensing of jar archives using a code-search approach},
      booktitle = {MSR},
      year = {2010},
      pages = {151-160},
      ee = {http://dx.doi.org/10.1109/MSR.2010.5463282},
      crossref = {DBLP:conf/msr/2010},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2010/05463282.pdf}
    }
    
      
    Abstract Free and open source software strongly promotes the reuse of source code. Some open source Java components/libraries are distributed as jar archives only containing the bytecode and some additional information. For whoever wanting to integrate this jar in her own project, it is important to determine the license(s) of the code from which the jar archive was produced, as this affects the way that such component can be used. This paper proposes an automatic approach to determine the license of jar archives, combining the use of a code-search engine with the automatic classification of licenses contained in textual flies enclosed in the jar. Results of an empirical study performed on 37 jars - from 17 different systems - indicate that this approach is able to successfully infer the jar licenses in over 95 % of the cases, but that in many cases the license in textual flies may differ from the one of the classes contained in the jar.
  60. [60]Z. Awedikian, K. Ayari, and G. Antoniol, “MC/DC automatic test input data generation,” in GECCO, 2009, pp. 1657–1664.
    Bibtex
      @inproceedings{conf/gecco/AwedikianAA09,
      author = {Awedikian, Zeina and Ayari, Kamel and Antoniol, Giuliano},
      title = {MC/DC automatic test input data generation},
      booktitle = {GECCO},
      year = {2009},
      pages = {1657-1664},
      ee = {http://doi.acm.org/10.1145/1569901.1570123},
      crossref = {DBLP:conf/gecco/2009g},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract In regulated domain such as aerospace and in safety critical domains, software quality assurance is subject to strict regulation such as the RTCA DO-178B standard. Among other conditions, the DO-178B mandates for the satisfaction of the modified condition/decision coverage (MC/DC) testing criterion for software where failure condition may have catastrophic consequences. MC/DC is a white box testing criterion aiming at proving that all conditions involved in a predicate can influence the predicate value in the desired way. In this paper, we propose a novel fitness function inspired by chaining test data generation to efficiently generate test input data satisfying the MC/DC criterion. Preliminary results show the superiority of the novel fitness function that is able to avoid plateau leading to a behavior close to random test of traditional white box fitness functions.
  61. [61]S. Gueorguiev, M. Harman, and G. Antoniol, “Software project planning for robustness and completion time in the presence of uncertainty using multi objective search based software engineering,” in GECCO, 2009, pp. 1673–1680.
    Bibtex
      @inproceedings{p1673-gueorguiev,
      author = {Gueorguiev, Stefan and Harman, Mark and Antoniol, Giuliano},
      title = {Software project planning for robustness and completion time in the presence of uncertainty using multi objective search based software engineering},
      booktitle = {GECCO},
      year = {2009},
      pages = {1673-1680},
      ee = {http://doi.acm.org/10.1145/1569901.1570125},
      crossref = {DBLP:conf/gecco/2009g},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2009/p1673-gueorguiev.pdf}
    }
    
      
    Abstract All large–scale projects contain a degree of risk and uncertainty. Software projects are particularly vulnerable to overruns, due the this uncertainty and the inherent difficulty of software project cost estimation. In this paper we introduce a search based approach to software project robustness. The approach is to formulate this problem as a multi objective Search Based Software Engineering problem, in which robustness and completion time are treated as two competing objectives. The paper presents the results of the application of this new approach to four large real–world software projects, using two different models of uncertainty.
  62. [62]G. Antoniol, R. Oliveto, and D. Poshyvanyk, “5^\mboxth international workshop on Traceability in Emerging Forms of Software Engineering (TEFSE 2009),” in ICSE Companion, 2009, pp. 472–473.
    Bibtex
      @inproceedings{conf/icse/AntoniolOP09,
      author = {Antoniol, Giuliano and Oliveto, Rocco and Poshyvanyk, Denys},
      title = {5$^{\mbox{th}}$ international workshop on Traceability in Emerging Forms of Software Engineering (TEFSE 2009)},
      booktitle = {ICSE Companion},
      year = {2009},
      pages = {472-473},
      ee = {http://dx.doi.org/10.1109/ICSE-COMPANION.2009.5071068},
      crossref = {DBLP:conf/icse/2009c},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  63. [63]A. Zaidman, G. Antoniol, and S. Ducasse, Eds., 16th Working Conference on Reverse Engineering, WCRE 2009, 13-16 October 2009, Lille, France. IEEE Computer Society, 2009.
    Bibtex
      @proceedings{conf/wcre/2009,
      editor = {Zaidman, Andy and Antoniol, Giuliano and Ducasse, St{\'e}phane},
      title = {16th Working Conference on Reverse Engineering, WCRE 2009, 13-16 October 2009, Lille, France},
      booktitle = {WCRE},
      publisher = {IEEE Computer Society},
      year = {2009},
      isbn = {978-0-7695-3867-9},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  64. [64]F. Khomh, Y.-G. Guéhéneuc, and G. Antoniol, “Playing roles in design patterns: An empirical descriptive and analytic study,” in ICSM, 2009, pp. 83–92.
    Bibtex
      @inproceedings{conf/icsm/KhomhGA09,
      author = {Khomh, Foutse and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Playing roles in design patterns: An empirical descriptive and analytic study},
      booktitle = {ICSM},
      year = {2009},
      pages = {83-92},
      ee = {http://dx.doi.org/10.1109/ICSM.2009.5306327},
      crossref = {DBLP:conf/icsm/2009},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract This work presents a descriptive and analytic study of classes playing zero, one, or two roles in six different design patterns (and combinations thereof). First, we answer three research questions showing that (1) classes playing one or two roles do exist in programs and are not negligible and that there are significant differences among the (2) internal (class metrics) and (3) external (change-proneness) characteristics of classes playing zero, one, or two roles. Second, we revisit a previous work on design patterns and changeability and show that its results were, in a great part, due to classes playing two roles. Third, we exemplify the use of the study results to provide a ranking of the occurrences of the design patterns identified in a program. The ranking allows developers to balance precision and recall.
  65. [65]D. M. Germán, M. D. Penta, Y.-G. Guéhéneuc, and G. Antoniol, “Code siblings: Technical and legal implications of copying code between applications,” in MSR, 2009, pp. 81–90.
    Bibtex
      @inproceedings{conf/msr/GermanPGA09,
      author = {Germ{\'a}n, Daniel M. and Penta, Massimiliano Di and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Code siblings: Technical and legal implications of copying code between applications},
      booktitle = {MSR},
      year = {2009},
      pages = {81-90},
      ee = {http://dx.doi.org/10.1109/MSR.2009.5069483},
      crossref = {DBLP:conf/msr/2009},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Source code cloning does not happen within a single system only. It can also occur between one system and another. We use the term code sibling to refer to a code clone that evolves in a wdifferent system than the code from which it originates. Code siblings can only occur when the source code copyright owner allows it and when the conditions imposed by such license are not incompatible with the license of the destination system. In some situations copying of source code fragments are allowed—legally—in one direction, but not in the other. In this paper, we use clone detection, license mining and classification, and change history techniques to understand how code siblings—under different licenses—flow in one direction or the other between Linux and two BSD Unixes, FreeBSD and OpenBSD. Our results show that, in most cases, this migration appears to happen according to the terms of the license of the original code being copied, favoring always copying from less restrictive licenses towards more restrictive ones. We also discovered that sometimes code is inserted to the kernels from an outside source.
  66. [66]S. L. Abebe, S. Haiduc, A. Marcus, P. Tonella, and G. Antoniol, “Analyzing the Evolution of the Source Code Vocabulary,” in CSMR, 2009, pp. 189–198.
    Bibtex
      @inproceedings{04812752,
      author = {Abebe, Surafel Lemma and Haiduc, Sonia and Marcus, Andrian and Tonella, Paolo and Antoniol, Giuliano},
      title = {Analyzing the Evolution of the Source Code Vocabulary},
      booktitle = {CSMR},
      year = {2009},
      pages = {189-198},
      ee = {http://dx.doi.org/10.1109/CSMR.2009.61},
      crossref = {DBLP:conf/csmr/2009},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2009/04812752.pdf}
    }
    
      
    Abstract Source code is a mixed software artifact, containing information for both the compiler and the developers. While programming language grammar dictates how the source code is written, developers have a lot of freedom in writing identifiers and comments. These are intentional in nature and become means of communication between developers. The goal of this paper is to analyze how the source code vocabulary changes during evolution, through an exploratory study of two software systems. Specifically, we collected data to answer a set of questions about the vocabulary evolution, such as: How does the size of the source code vocabulary evolve over time? What do most frequent terms refer to? Are new identifiers introducing new terms? Are there terms shared between different types of identifiers and comments? Are new and deleted terms in a type of identifiers mirrored in other types of identifiers or in comments?
  67. [67]S. Kpodjedo, F. Ricca, P. Galinier, and G. Antoniol, “Recovering the Evolution Stable Part Using an ECGM Algorithm: Is There a Tunnel in Mozilla?,” in CSMR, 2009, pp. 179–188.
    Bibtex
      @inproceedings{04812751,
      author = {Kpodjedo, Segla and Ricca, Filippo and Galinier, Philippe and Antoniol, Giuliano},
      title = {Recovering the Evolution Stable Part Using an ECGM Algorithm: Is There a Tunnel in Mozilla?},
      booktitle = {CSMR},
      year = {2009},
      pages = {179-188},
      ee = {http://dx.doi.org/10.1109/CSMR.2009.24},
      crossref = {DBLP:conf/csmr/2009},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2009/04812751.pdf}
    }
    
      
    Abstract Analyzing the evolutionary history of the design of Object-Oriented Software is an important and difficult task where matching algorithms play a fundamental r ole. In this paper, we investigate the applicability of an error-correcting graph matching (ECGM) algorithm to object-oriented software evolution. By means of a case study, we report evidence of ECGM applicability in studying the Mozilla class diagram evolution. We collected 144 Mozilla snapshots over the past six years, reverse-engineered class diagrams and recovered traceability links between subsequent class diagrams. Our algorithm allows us to identify evolving classes that maintain a stable structure of relations (associations, inheritances and aggregations) with other classes and thus likely constitute the backbone of Mozilla.
  68. [68]G. Antoniol, “Keynote Paper: Search Based Software Testing for Software Security: Breaking Code to Make it Safer,” in ICST Workshops, 2009, pp. 87–100.
    Bibtex
      @inproceedings{conf/icst/Antoniol09,
      author = {Antoniol, Giuliano},
      title = {Keynote Paper: Search Based Software Testing for Software Security: Breaking Code to Make it Safer},
      booktitle = {ICST Workshops},
      year = {2009},
      pages = {87-100},
      ee = {http://dx.doi.org/10.1109/ICSTW.2009.12, http://doi.ieeecomputersociety.org/10.1109/ICSTW.2009.12},
      crossref = {DBLP:conf/icst/2009w},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  69. [69]G. Antoniol, J. H. Hayes, Y.-G. Guéhéneuc, and M. D. Penta, “Reuse or rewrite: Combining textual, static, and dynamic analyses to assess the cost of keeping a system up-to-date,” in ICSM, 2008, pp. 147–156.
    Bibtex
      @inproceedings{conf/icsm/AntoniolHGP08,
      author = {Antoniol, Giuliano and Hayes, Jane Huffman and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Penta, Massimiliano Di},
      title = {Reuse or rewrite: Combining textual, static, and dynamic analyses to assess the cost of keeping a system up-to-date},
      booktitle = {ICSM},
      year = {2008},
      pages = {147-156},
      ee = {http://dx.doi.org/10.1109/ICSM.2008.4658063},
      crossref = {DBLP:conf/icsm/2008},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  70. [70]C. D. Grosso, G. Antoniol, E. Merlo, and P. Galinier, “Detecting buffer overflow via automatic test input data generation,” Computers & OR, vol. 35, no. 10, pp. 3125–3143, 2008.
    Bibtex
      @article{journals/cor/GrossoAMG08,
      author = {Grosso, Concettina Del and Antoniol, Giuliano and Merlo, Ettore and Galinier, Philippe},
      title = {Detecting buffer overflow via automatic test input data generation},
      journal = {Computers {\&} OR},
      volume = {35},
      number = {10},
      year = {2008},
      pages = {3125-3143},
      ee = {http://dx.doi.org/10.1016/j.cor.2007.01.013},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Buffer overflows cause serious problems in various categories of software systems. In critical systems, such as health-care, nuclear or aerospace software applications, a buffer overflow may cause severe threats to humans or severe economic losses. If they occur in network or security applications, they can be exploited to gain administrator privileges, perform system attacks, access unauthorized data, or misuse the system. This paper proposes a combination of genetic algorithms, linear programming, evolutionary testing, and static and dynamic information to detect buffer overflows. The newly proposed test input generation process avoids the need for human intervention to define and tune genetic algorithm weights and therefore it becomes completely automated. The process that guides the genetic search towards the detection of buffer overflow relies on a fitness function that takes into account static and dynamic information. Reported results of our case studies, consisting of two sets of open-source programs show that the new process and fitness function outperform previously published approaches.
  71. [71]Y.-G. Guéhéneuc and G. Antoniol, “DeMIMA: A Multilayered Approach for Design Pattern Identification,” IEEE Trans. Software Eng., vol. 34, no. 5, pp. 667–684, 2008.
    Bibtex
      @article{journals/tse/GueheneucA08,
      author = {Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {DeMIMA: A Multilayered Approach for Design Pattern Identification},
      journal = {IEEE Trans. Software Eng.},
      volume = {34},
      number = {5},
      year = {2008},
      pages = {667-684},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2008.48},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Design patterns are important in object-oriented programming because they offer design motifs, elegant solutions to recurrent design problems, which improve the quality of software systems. Design motifs facilitate system maintenance by helping to understand design and implementation. However, after implementation, design motifs are spread throughout the source code and are thus not directly available to maintainers. We present DeMIMA, an approach to identify semi-automatically micro-architectures that are similar to design motifs in source code and to ensure the traceability of these micro-architectures between implementation and design. DeMIMA consists of three layers: two layers to recover an abstract model of the source code, including binary class relationships, and a third layer to identify design patterns in the abstract model. We apply DeMIMA to five open-source systems and, on average, we observe 34% precision for the considered 12 design motifs. Through the use of explanation-based constraint programming, DeMIMA ensures 100% recall on the five systems. We also apply DeMIMA on 33 industrial components.
  72. [72]M. D. Penta, L. Cerulo, Y.-G. Guéhéneuc, and G. Antoniol, “An empirical study of the relationships between design pattern roles and class change proneness,” in ICSM, 2008, pp. 217–226.
    Bibtex
      @inproceedings{04658070,
      author = {Penta, Massimiliano Di and Cerulo, Luigi and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {An empirical study of the relationships between design pattern roles and class change proneness},
      booktitle = {ICSM},
      year = {2008},
      pages = {217-226},
      ee = {http://dx.doi.org/10.1109/ICSM.2008.4658070},
      crossref = {DBLP:conf/icsm/2008},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2008/04658070.pdf}
    }
    
      
    Abstract Analyzing the change-proneness of design patterns and the kinds of changes occurring to classes playing role(s) in some design pattern(s) during software evolution poses the basis for guidelines to help developers who have to choose, apply or maintain design patterns. Building on previous work, this paper shifts the focus from design patterns as wholes to the finer-grain level of design pattern roles. It presents an empirical study to understand whether there are roles that are more change-prone than others and whether there are changes that are more likely to occur to certain roles. It relies on data extracted from the source code repositories of three different systems (JHotDraw, Xerces, and Eclipse-JDT) and from 12 design patterns.
  73. [73]B. Kenmei, G. Antoniol, and M. D. Penta, “Trend Analysis and Issue Prediction in Large-Scale Open Source Systems,” in CSMR, 2008, pp. 73–82.
    Bibtex
      @inproceedings{04493302,
      author = {Kenmei, B{\'e}n{\'e}dicte and Antoniol, Giuliano and Penta, Massimiliano Di},
      title = {Trend Analysis and Issue Prediction in Large-Scale Open Source Systems},
      booktitle = {CSMR},
      year = {2008},
      pages = {73-82},
      ee = {http://dx.doi.org/10.1109/CSMR.2008.4493302},
      crossref = {DBLP:conf/csmr/2008},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2008/04493302.pdf}
    }
    
      
    Abstract Effort to evolve and maintain a software system is likely to vary depending on the amount and frequency of change requests. This paper proposes to model change requests as time series and to rely on time series mathematical framework to analyze and model them. In particular, this paper focuses on the number of new change requests per KLOC and per unit of time. Time series can have a two-fold application: they can be used to forecast future values and to identify trends. Increasing trends can indicate an increase in customer requests for new features or a decrease in the software system quality. A decreasing trend can indicate application stability and maturity, but also a reduced popularity and adoption. The paper reports case studies over about five years for three large open source applications: Eclipse, Mozilla and JBoss. The case studies show the capability of time series to model change request density and provide empirical evidence of an increasing trend in newly opened change requests in the JBoss application framework.
  74. [74]G. Antoniol, K. Ayari, M. D. Penta, F. Khomh, and Y.-G. Guéhéneuc, “Is it a bug or an enhancement?: a text-based approach to classify change requests,” in CASCON, 2008, p. 23.
    Bibtex
      @inproceedings{conf/cascon/AntoniolAPKG08,
      author = {Antoniol, Giuliano and Ayari, Kamel and Penta, Massimiliano Di and Khomh, Foutse and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {Is it a bug or an enhancement?: a text-based approach to classify change requests},
      booktitle = {CASCON},
      year = {2008},
      pages = {23},
      ee = {http://doi.acm.org/10.1145/1463788.1463819},
      crossref = {DBLP:conf/cascon/2008},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Bug tracking systems are valuable assets for managing maintenance activities. They are widely used in open-source projects as well as in the software industry. They collect many different kinds of issues: requests for defect fixing, enhancements, refactoring/restructuring activities and organizational issues. These different kinds of issues are simply labeled as "bug" for lack of a better classification support or of knowledge about the possible kinds. This paper investigates whether the text of the issues posted in bug tracking systems is enough to classify them into corrective maintenance and other kinds of activities. We show that alternating decision trees, naive Bayes classifiers, and logistic regression can be used to accurately distinguish bugs from other kinds of issues. Results from empirical studies performed on issues for Mozilla, Eclipse, and JBoss indicate that issues can be classified with between 77% and 82% of correct decisions.
  75. [75]M. D. Penta, G. Antoniol, and M. Harman, “Special Issue on Search-Based Software Maintenance,” Journal of Software Maintenance, vol. 20, no. 5, pp. 317–319, 2008.
    Bibtex
      @article{journals/smr/PentaAH08,
      author = {Penta, Massimiliano Di and Antoniol, Giuliano and Harman, Mark},
      title = {Special Issue on Search-Based Software Maintenance},
      journal = {Journal of Software Maintenance},
      volume = {20},
      number = {5},
      year = {2008},
      pages = {317-319},
      ee = {http://dx.doi.org/10.1002/smr.395},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  76. [76]S. Kpodjedo, F. Ricca, P. Galinier, and G. Antoniol, “Error Correcting Graph Matching Application to Software Evolution,” in WCRE, 2008, pp. 289–293.
    Bibtex
      @inproceedings{conf/wcre/KpodjedoRGA08,
      author = {Kpodjedo, Segla and Ricca, Filippo and Galinier, Philippe and Antoniol, Giuliano},
      title = {Error Correcting Graph Matching Application to Software Evolution},
      booktitle = {WCRE},
      year = {2008},
      pages = {289-293},
      ee = {http://dx.doi.org/10.1109/WCRE.2008.48},
      crossref = {DBLP:conf/wcre/2008},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Graph representations and graph algorithms are widely adopted to model and resolve problems in many different areas from telecommunications, to bio-informatics, to civil and software engineering. Many software artifacts such as the class diagram can be thought of as graphs and thus, many software evolution problems can be reformulated as a graph matching problem. In this paper, we investigate the applicability of an error-correcting graph matching algorithm to object-oriented software evolution and report results obtained on a small system — the Latazza application — supporting applicability and usefulness of our proposal.
  77. [77]J. H. Hayes, G. Antoniol, and Y.-G. Guéhéneuc, “PREREQIR: Recovering Pre-Requirements via Cluster Analysis,” in WCRE, 2008, pp. 165–174.
    Bibtex
      @inproceedings{04656406,
      author = {Hayes, Jane Huffman and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {PREREQIR: Recovering Pre-Requirements via Cluster Analysis},
      booktitle = {WCRE},
      year = {2008},
      pages = {165-174},
      ee = {http://dx.doi.org/10.1109/WCRE.2008.36},
      crossref = {DBLP:conf/wcre/2008},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2008/04656406.pdf}
    }
    
      
    Abstract High-level software artifacts, such as requirements, domain-specific requirements, and so on, are an important source of information that is often neglected during the reverse- and re-engineering processes. We posit that domain specific pre-requirements information (PRI) can be obtained by eliciting the st akeholders’ understanding of generic systems or domains. We discuss the semi-automatic recovery of domain-specific PRI that can then be used during reverse and re-engineering, for example, to recover traceability links or to assess the degree of obsolescence of a system with respect to competing systems and the clients’ expectations. We present a method using partition around medoids and agglomerative clustering for obtaining, structuring, analyzing, and labeling textual PRI from a group of diverse stakeholders. We validate our method using PRI for the development of a generic Web browser provided by 22 different stakeholders. We show that, for a similarity threshold of about 0.36, about 55% of the PRI were common to two or more stakeholders and 42% were outliers. We automatically label the common and outlier PRI (82% correctly labeled), and obtain 74% accuracy for the similarity threshold of 0.36 (78% for a th reshold of 0.5). We assess the recall and precision of the method, and compare the labeled PRI to a generic Web browser requirements specification.
  78. [78]M. Eaddy, A. V. Aho, G. Antoniol, and Y.-G. Guéhéneuc, “CERBERUS: Tracing Requirements to Source Code Using Information Retrieval, Dynamic Analysis, and Program Analysis,” in ICPC, 2008, pp. 53–62.
    Bibtex
      @inproceedings{conf/iwpc/EaddyAAG08,
      author = {Eaddy, Marc and Aho, Alfred V. and Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {CERBERUS: Tracing Requirements to Source Code Using Information Retrieval, Dynamic Analysis, and Program Analysis},
      booktitle = {ICPC},
      year = {2008},
      pages = {53-62},
      ee = {http://dx.doi.org/10.1109/ICPC.2008.39},
      crossref = {DBLP:conf/iwpc/2008},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract The concern location problem is to identify the source code within a program related to the features, requirements, or other concerns of the program. This problem is central to program development and maintenance. We present a new technique called prune dependency analysis that can be combined with existing techniques to dramatically improve the accuracy of concern location. We developed CERBERUS, a potent hybrid technique for concern location that combines i nformation retrieval, execution tracing, and prune dependency analysis. We used CERBERUS to trace the 360 requirements of RHINO, a 32,134 line Java program that implements the ECMAScript international standard. In our experiment, prune dependency analysis boosted the recall of information retrieval by 155% an d execution tracing by 104%. Moreover, we show that our combined technique outperformed the other techniques when run individually or in pairs
  79. [79]D. Poshyvanyk, Y.-G. Guéhéneuc, A. Marcus, G. Antoniol, and V. Rajlich, “Feature Location Using Probabilistic Ranking of Methods Based on Execution Scenarios and Information Retrieval,” IEEE Trans. Software Eng., vol. 33, no. 6, pp. 420–432, 2007.
    Bibtex
      @article{journals/tse/PoshyvanykGMAR07,
      author = {Poshyvanyk, Denys and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Marcus, Andrian and Antoniol, Giuliano and Rajlich, V{\'a}clav},
      title = {Feature Location Using Probabilistic Ranking of Methods Based on Execution Scenarios and Information Retrieval},
      journal = {IEEE Trans. Software Eng.},
      volume = {33},
      number = {6},
      year = {2007},
      pages = {420-432},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2007.1016},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract This paper recasts the problem of feature location in source code as a decision-making problem in the presence of uncertainty. The solution to the problem is formulated as a combination of the opinions of different experts. The experts in this work are two existing techniques for feature location: a scenario-based probabilistic ranking of events and an information-retrieval-based technique that uses Latent Semantic Indexing. The combination of these two experts is empirically evaluated through several case studies, which use the source code of the Mozilla Web browser and the Eclipse integrated development environment. The results show that the combination of experts significantly improves the effectiveness of feature location as compared to each of the experts used independently.
  80. [80]G. Antoniol, Y.-G. Guéhéneuc, E. Merlo, and P. Tonella, “Mining the Lexicon Used by Programmers during Sofware Evolution,” in ICSM, 2007, pp. 14–23.
    Bibtex
      @inproceedings{conf/icsm/AntoniolGMT07,
      author = {Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Merlo, Ettore and Tonella, Paolo},
      title = {Mining the Lexicon Used by Programmers during Sofware Evolution},
      booktitle = {ICSM},
      year = {2007},
      pages = {14-23},
      ee = {http://dx.doi.org/10.1109/ICSM.2007.4362614},
      crossref = {DBLP:conf/icsm/2007},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  81. [81]S. Bellon, R. Koschke, G. Antoniol, J. Krinke, and E. Merlo, “Comparison and Evaluation of Clone Detection Tools,” IEEE Trans. Software Eng., vol. 33, no. 9, pp. 577–591, 2007.
    Bibtex
      @article{04288192,
      author = {Bellon, Stefan and Koschke, Rainer and Antoniol, Giuliano and Krinke, Jens and Merlo, Ettore},
      title = {Comparison and Evaluation of Clone Detection Tools},
      journal = {IEEE Trans. Software Eng.},
      volume = {33},
      number = {9},
      year = {2007},
      pages = {577-591},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2007.70725},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2007/04288192.pdf}
    }
    
      
    Abstract Many techniques for detecting duplicated source code (software clones) have been proposed in the past. However, it is not yet clear how these techniques compare in terms of recall and precision as well as space and time requirements. This paper presents an experiment that evaluates six clone detectors based on eight large C and Java programs (altogether almost 850 KLOC). Their clone candidates were evaluated by one of the authors as independent third party. The selected techniques cover the whole spectrum of the state-of-the-art in clone detection. The techniques work on text, lexical and syntactic information, software metrics, and program dependency graphs. Index Terms– Redundant code, duplicated code, software clones.
  82. [82]K. Ayari, P. Meshkinfam, G. Antoniol, and M. D. Penta, “Threats on building models from CVS and Bugzilla repositories: the Mozilla case study,” in CASCON, 2007, pp. 215–228.
    Bibtex
      @inproceedings{p215-ayari,
      author = {Ayari, Kamel and Meshkinfam, Peyman and Antoniol, Giuliano and Penta, Massimiliano Di},
      title = {Threats on building models from CVS and Bugzilla repositories: the Mozilla case study},
      booktitle = {CASCON},
      year = {2007},
      pages = {215-228},
      ee = {http://doi.acm.org/10.1145/1321211.1321234},
      crossref = {DBLP:conf/cascon/2007},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2007/p215-ayari.pdf}
    }
    
      
    Abstract Information obtained by merging data extracted from problem reporting systems – such as Bugzilla – and versioning systems – such as Concurrent Version System (CVS) – is widely used in quality assessment approaches. This paper attempts to shed some light on threats and difficulties faced when trying to integrate information extracted from Mozilla CVS and bug repositories. Indeed, the heterogeneity of Mozilla bug reports, often dealing with non-defect issues, and lacking of traceable information may undermine validity of quality assessment approaches relying on repositories integration. In the reported Mozilla case study, we observed that available integration heuristics are unable to recover thousands of traceability links. Furthermore, Bugzilla classification mechanisms do not enforce a distinction between different kinds of maintenance activities. Obtained evidence suggests that a large amount of information is lost; we conjecture that to benefit from CVS and problem reporting systems, more systematic issue classification and more reliable traceability mechanisms are needed.
  83. [83]E. Merlo, D. Letarte, and G. Antoniol, “Automated Protection of PHP Applications Against SQL-injection Attacks,” in CSMR, 2007, pp. 191–202.
    Bibtex
      @inproceedings{04145037,
      author = {Merlo, Ettore and Letarte, Dominic and Antoniol, Giuliano},
      title = {Automated Protection of PHP Applications Against SQL-injection Attacks},
      booktitle = {CSMR},
      year = {2007},
      pages = {191-202},
      ee = {http://dx.doi.org/10.1109/CSMR.2007.16, http://doi.ieeecomputersociety.org/10.1109/CSMR.2007.16},
      crossref = {DBLP:conf/csmr/2007},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2007/04145037.pdf}
    }
    
      
    Abstract Web sites may be static sites, programs, or databases, and very often a combination of the three integrating relational databases as a back-end. Web sites require care in configuration and programming to assure security, confidentiality, and trustworthiness of the published information. SQL-injection attacks exploit weak validation of textual input used to build database queries. Maliciously crafted input may threaten the confidentiality and the security policies of Web sites relying on a database to store and retrieve information. This paper presents an original approach that combines static analysis, dynamic analysis, and code reengineering to automatically protect applications written in PHP from SQL-injection attacks. The paper also reports preliminary results of experiments performed on an old SQL-injection prone version of phpBB (version 2.0.0, 37193 LOC of PHP version 4.2.2 code). Results show that our approach successfully improved phpBB-2.0.0 resistance to SQLinjection attacks.
  84. [84]R. Oliveto, G. Antoniol, A. Marcus, and J. H. Hayes, “Software Artefact Traceability: the Never-Ending Challenge,” in ICSM, 2007, pp. 485–488.
    Bibtex
      @inproceedings{04362664,
      author = {Oliveto, Rocco and Antoniol, Giuliano and Marcus, Andrian and Hayes, Jane Huffman},
      title = {Software Artefact Traceability: the Never-Ending Challenge},
      booktitle = {ICSM},
      year = {2007},
      pages = {485-488},
      ee = {http://dx.doi.org/10.1109/ICSM.2007.4362664},
      crossref = {DBLP:conf/icsm/2007},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2007/04362664.pdf}
    }
    
      
    Abstract Software artefact traceability is widely recognised as an important factor for the effective development and maintenance of a software system. Unfortunately, the lack of automatic or semi-automatic supports makes the task of maintaining links among software artefacts a tedious and time consuming one. For this reason, often traceability information becomes out of date or it is completely absent during software development. In this working session, we discuss problems and challenges related to various aspects of trace-ability in software systems.
  85. [85]K. Ayari, S. Bouktif, and G. Antoniol, “Automatic mutation test input data generation via ant colony,” in GECCO, 2007, pp. 1074–1081.
    Bibtex
      @inproceedings{conf/gecco/AyariBA07,
      author = {Ayari, Kamel and Bouktif, Salah and Antoniol, Giuliano},
      title = {Automatic mutation test input data generation via ant colony},
      booktitle = {GECCO},
      year = {2007},
      pages = {1074-1081},
      ee = {http://doi.acm.org/10.1145/1276958.1277172},
      crossref = {DBLP:conf/gecco/2007},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Fault-based testing is often advocated to overcome limitations of other testing approaches; however it is also recognized as being expen sive. On the other hand, evolutionary algorithms have been proved suitable for reducing the cost of data generation in the context of coverage based testing. In this paper, we propose a new evolutionary approach based on ant colony optimization for au tomatic test input data generation in the context of mutation testing to reduce the cost of such a test strategy. In our approach the a nt colony optimization algorithm is enhanced by a probability density estimation technique. We compare our proposal with other evolution ary algorithms, e.g., Genetic Algorithm. Our preliminary results on JAVA testbeds show that our approach performed significantly better than other alternatives.
  86. [86]E. Merlo, D. Letarte, and G. Antoniol, “SQL-Injection Security Evolution Analysis in PHP,” in WSE, 2007, pp. 45–49.
    Bibtex
      @inproceedings{04380243,
      author = {Merlo, Ettore and Letarte, Dominic and Antoniol, Giuliano},
      title = {SQL-Injection Security Evolution Analysis in PHP},
      booktitle = {WSE},
      year = {2007},
      pages = {45-49},
      ee = {http://dx.doi.org/10.1109/WSE.2007.4380243},
      crossref = {DBLP:conf/wse/2007},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2007/04380243.pdf}
    }
    
      
    Abstract Web sites are often a mixture of static sites and programs that integrate relational databases as a back-end. Software that implements Web sites continuously evolve to meet ever-changing user needs. As a Web sites evolve, new versions of programs, interactions and functionalities are added and existing ones are removed or modified. Web sites require configuration and programming attention to assure security, confidentiality, and trustiness of the published information. During evolution of Web software, from one version to the next one, security flaws may be introduced, corrected, or ignored. This paper presents an investigation of the evolution of security vulnerabilities as detected by propagating and combining granted authorization levels along an inter-procedural control flow graph (CFG) together with required security levels for DB accesses with respect to SQL-injection attacks. The paper reports results about experiments performed on 31 versions of phpBB, that is a publicly available bulletin board written in PHP, version 1.0.0 (9547 LOC) to version 2.0.22 (40663 LOC) have been considered as a case study. Results show that the vulnerability analysis can be used to observe and monitor the evolution of security vulnerabilities in subsequent versions of the same software package. Suggestions for further research are also presented.
  87. [87]M. D. Penta, M. Harman, G. Antoniol, and F. Qureshi, “The Effect of Communication Overhead on Software Maintenance Project Staffing: a Search-Based Approach,” in ICSM, 2007, pp. 315–324.
    Bibtex
      @inproceedings{conf/icsm/PentaHAQ07,
      author = {Penta, Massimiliano Di and Harman, Mark and Antoniol, Giuliano and Qureshi, Fahim},
      title = {The Effect of Communication Overhead on Software Maintenance Project Staffing: a Search-Based Approach},
      booktitle = {ICSM},
      year = {2007},
      pages = {315-324},
      ee = {http://dx.doi.org/10.1109/ICSM.2007.4362644},
      crossref = {DBLP:conf/icsm/2007},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Brooks’ milestone ‘Mythical Man Month’ established the observation that there is no simple conversion between people and time in large scale software projects. Communication and training overheads yield a subtle and variable relationship between the person-months required for a project and the number of people needed to complete the task within a given time frame. This paper formalises several instantiations of Brooks’ law and uses these to construct project schedule and staffing instances — using a search-based project staffing and scheduling approach — on data from two large real world maintenance projects. The results reveal the impact of different formulations of Brooks’ law on project completion time and on staff distribution across teams, and the influence of other factors such as the presence of dependencies between work packages on the effect of communication overhead.
  88. [88]G. Antoniol, “Requiem for software evolution research: a few steps toward the creative age,” in IWPSE, 2007, pp. 1–3.
    Bibtex
      @inproceedings{p1-antoniol,
      author = {Antoniol, Giuliano},
      title = {Requiem for software evolution research: a few steps toward the creative age},
      booktitle = {IWPSE},
      year = {2007},
      pages = {1-3},
      ee = {http://doi.acm.org/10.1145/1294948.1294950},
      crossref = {DBLP:conf/iwpse/2007},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2007/p1-antoniol.pdf}
    }
    
      
    Abstract Nowadays almost every company depends on software technologies to function, the challenge is that the technologies and software applications are constantly changing and adapting to the needs of users. This process of change is risky, since unplanned and undisciplined changes in any software system of realistic size risk degrading the quality of the software and producing unexpected side effects. The need for disciplined, intelligent, cost-effective software change and evolution is an urgent technological challenge in the software engineering field. New technologies, new social and cultural trends, a widespread adoption of open source software, the market globalization and new development environments are spelling the requiem to the traditional way in which software evolution research was carried out. Evolution research must evolve and adapt to the new society needs and trends thus turning challenges into opportunities. This keynote attempts to shed some light on key factors such new technology transfer opportunity, the need of benchmarks and the three items each and every research program in software evolution should integrate in one way or the other.
  89. [89]G. Antoniol and M. Ceccarelli, “Microarray image gridding with stochastic search based approaches,” Image Vision Comput., vol. 25, no. 2, pp. 155–163, 2007.
    Bibtex
      @article{1-s2.0-S0262885606000710-main,
      author = {Antoniol, Giuliano and Ceccarelli, Michele},
      title = {Microarray image gridding with stochastic search based approaches},
      journal = {Image Vision Comput.},
      volume = {25},
      number = {2},
      year = {2007},
      pages = {155-163},
      ee = {http://dx.doi.org/10.1016/j.imavis.2006.01.023},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2007/1-s2.0-S0262885606000710-main.pdf}
    }
    
      
    Abstract The paper reports a novel approach for the problem of automatic gridding in Microarray images. Such problem often requires human intervention, therefore, the development of automated procedures is a fundamental issue for large scale functional genomic experiments involving many microarray images. Our method uses a two step process. First a regular rectangular grid is superimposed on the image by interpolating a set of guide spots, this is done by solving a non-linear optimization process with a stochastic search producing the best interpolating grid parametrised by a six values vector. Second, the interpolating grid is adapted, with a Markov Chain Monte Carlo method, to local deformations. This is done by modeling the solution a Markov Random Field with a Gibbs prior possibly containing first order cliques (1-clique). The algorithm is completely automatic and no human intervention is required, it efficiently accounts arbitrary grid rotations, irregularities and various spot sizes.
  90. [90]S. Bouktif, Y.-G. Guéhéneuc, and G. Antoniol, “Extracting Change-patterns from CVS Repositories,” in WCRE, 2006, pp. 221–230.
    Bibtex
      @inproceedings{04023992,
      author = {Bouktif, Salah and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Extracting Change-patterns from CVS Repositories},
      booktitle = {WCRE},
      year = {2006},
      pages = {221-230},
      ee = {http://doi.ieeecomputersociety.org/10.1109/WCRE.2006.27},
      crossref = {DBLP:conf/wcre/2006},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2006/04023992.pdf}
    }
    
      
    Abstract Often, the only sources of information about the evolution of software systems are the systems themselves and their histories. Version control repositories contain information on several thousand of files and on millions of changes. We propose an approach based on dynamic time warping to discover change-patterns, which, for example, describe files that change together almost all the time. We define the Synchrony change-pattern to answer the question: given a software system and one file under modification, what others files must be changed? We have applied our approach on PADL, a software system developed in Java, and on Mozilla. Interesting results are achieved even when the discovered groups of co-changing files are compared with these provided by experts.
  91. [91]S. Bouktif, G. Antoniol, E. Merlo, and M. Neteler, “A novel approach to optimize clone refactoring activity,” in GECCO, 2006, pp. 1885–1892.
    Bibtex
      @inproceedings{p1885-bouktif,
      author = {Bouktif, Salah and Antoniol, Giuliano and Merlo, Ettore and Neteler, Markus},
      title = {A novel approach to optimize clone refactoring activity},
      booktitle = {GECCO},
      year = {2006},
      pages = {1885-1892},
      ee = {http://doi.acm.org/10.1145/1143997.1144312},
      crossref = {DBLP:conf/gecco/2006},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2006/p1885-bouktif.pdf}
    }
    
      
    Abstract Software evolution and software quality are ever changing phenomena. As software evolves, evolution impacts software quality. On the other hand, software quality needs may drive software evolution strategies. This paper presents an approach to schedule quality improvement under constraints and priority. The general problem of scheduling quality improvement has been instantiated into the concrete problem of planning duplicated code removal in a geographical information system developed in C throughout the last 20 years. Priority and constraints arise from development team and from the adopted development process. The developer team long term goal is to get rid of duplicated code, improve software structure, decrease coupling, and improve cohesion. We present our problem formulation, the adopted approach, including a model of clone removal effort and preliminary results obtained on a real world application.
  92. [92]E. Merlo, D. Letarte, and G. Antoniol, “Insider and Ousider Threat-Sensitive SQL Injection Vulnerability Analysis in PHP,” in WCRE, 2006, pp. 147–156.
    Bibtex
      @inproceedings{04023985,
      author = {Merlo, Ettore and Letarte, Dominic and Antoniol, Giuliano},
      title = {Insider and Ousider Threat-Sensitive SQL Injection Vulnerability Analysis in PHP},
      booktitle = {WCRE},
      year = {2006},
      pages = {147-156},
      ee = {http://doi.ieeecomputersociety.org/10.1109/WCRE.2006.33},
      crossref = {DBLP:conf/wcre/2006},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2006/04023985.pdf}
    }
    
      
    Abstract In general, SQL-injection attacks rely on some weak validation of textual input used to build database queries. Maliciously crafted input may threaten the confidentiality and the security policies of Web sites relying on a database to store and retrieve information. Furthermore, insiders may introduce malicious code in a Web application, code that, when triggered by some specific input, for example, would violate security policies. This paper presents an original approach based on static analysis to automatically detect statements in PHP applications that may be vulnerable to SQL-injections triggered by either malicious input (outsider threats) or malicious code (insider threats). Original flow analysis equations, that propagate and combine security levels along an inter-procedural control flow graph (CFG), are presented. The computation of security levels presents linear execution time and memory complexity.
  93. [93]M. Ceccarelli and G. Antoniol, “A Deformable Grid-Matching Approach for Microarray Images,” IEEE Transactions on Image Processing, vol. 15, no. 10, pp. 3178–3188, 2006.
    Bibtex
      @article{01703603,
      author = {Ceccarelli, Michele and Antoniol, Giuliano},
      title = {A Deformable Grid-Matching Approach for Microarray Images},
      journal = {IEEE Transactions on Image Processing},
      volume = {15},
      number = {10},
      year = {2006},
      pages = {3178-3188},
      ee = {http://dx.doi.org/10.1109/TIP.2006.877488},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2006/01703603.pdf}
    }
    
      
    Abstract A fundamental step of microarray image analysis is the detection of the grid structure for the accurate location of each spot, representing the state of a given gene in a particular experimental condition. This step is known as gridding and belongs to the class of deformable grid matching problems which are well known in literature. Most of the available microarray gridding approaches require human intervention; for example, to specify landmarks, some points in the spot grid, or even to precisely locate individual spots. Automating this part of the process can allow high throughput analysis. This paper focuses on the development of a fully automated procedure for the problem of automatic microarray gridding. It is grounded on the Bayesian paradigm and on image analysis techniques. The procedure has two main steps. The first step, based on the Radon transform, is aimed at generating a grid hypothesis; the second step accounts for local grid deformations. The accuracy and properties of the procedure are quantitatively assessed over a set of synthetic and real images; the results are compared with well-known methods available from the literature.
  94. [94]G. Antoniol and Y.-G. Guéhéneuc, “Feature Identification: An Epidemiological Metaphor,” IEEE Trans. Software Eng., vol. 32, no. 9, pp. 627–641, 2006.
    Bibtex
      @article{01707664,
      author = {Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {Feature Identification: An Epidemiological Metaphor},
      journal = {IEEE Trans. Software Eng.},
      volume = {32},
      number = {9},
      year = {2006},
      pages = {627-641},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2006.88},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2006/01707664.pdf}
    }
    
      
    Abstract Feature identification is a technique to identify the source code constructs activated when exercising one of the features of a program. We propose new statistical analyses of static and dynamic data to accurately identify features in large multithreaded object-oriented programs. We draw inspiration from epidemiology to improve previous approaches to feature identification and develop an epidemiological metaphor. We build our metaphor on our previous approach to feature identification, in which we use processor emulation, knowledge-based filtering, probabilistic ranking, and metamodeling. We carry out three case studies to assess the usefulness of our metaphor, using the "save a bookmark" feature of Web browsers as an illustration. In the first case study, we compare our approach with three previous approaches (a naive approach, a concept analysis-based approach, and our previous probabilistic approach) in identifying the feature in MOZILLA, a large, real-life, multithreaded object-oriented program. In the second case study, we compare the implementation of the feature in the FIREFOX and MOZILLA Web browsers. In the third case study, we identify the same feature in two more Web browsers, Chimera (in C) and ICEBrowser (in Java), and another feature in JHOTDRAW and XFIG, to highlight the generalizability of our metaphor
  95. [95]M. Salah, S. Mancoridis, G. Antoniol, and M. D. Penta, “Scenario-Driven Dynamic Analysis for Comprehending Large Software Systems,” in CSMR, 2006, pp. 71–80.
    Bibtex
      @inproceedings{conf/csmr/SalahMAP06,
      author = {Salah, Maher and Mancoridis, Spiros and Antoniol, Giuliano and Penta, Massimiliano Di},
      title = {Scenario-Driven Dynamic Analysis for Comprehending Large Software Systems},
      booktitle = {CSMR},
      year = {2006},
      pages = {71-80},
      ee = {http://dx.doi.org/10.1109/CSMR.2006.47, http://doi.ieeecomputersociety.org/10.1109/CSMR.2006.47},
      crossref = {DBLP:conf/csmr/2006},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Understanding large software systems is simplified when a combination of techniques for static and dynamic analysis is employed. Effective dynamic analysis requires that execution traces be generated by executing scenarios that are representative of the system’s typical usage. This paper presents an approach that uses dynamic analysis to extract views of a software system at different levels, namely (1) use cases views, (2) module interaction views, and (3) class interaction views. The proposed views can be used to help maintainers locate features to be changed. The proposed approach is evaluated against a large software system, the Mozilla Web browser.
  96. [96]S. Bouktif, H. A. Sahraoui, and G. Antoniol, “Simulated annealing for improving software quality prediction,” in GECCO, 2006, pp. 1893–1900.
    Bibtex
      @inproceedings{p1893-bouktif,
      author = {Bouktif, Salah and Sahraoui, Houari A. and Antoniol, Giuliano},
      title = {Simulated annealing for improving software quality prediction},
      booktitle = {GECCO},
      year = {2006},
      pages = {1893-1900},
      ee = {http://doi.acm.org/10.1145/1143997.1144313},
      crossref = {DBLP:conf/gecco/2006},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2006/p1893-bouktif.pdf}
    }
    
      
    Abstract In this paper, we propose an approach for the combination and adaptation of software quality predictive models. Quality models are decomposed into sets of expertise. The approach can be seen as a search for a valuable set of expertise that when combined form a model with an optimal predictive accuracy. Since, in general, there will be several experts available and each expert will provide his expertise, the problem can be reformulated as an optimization and search problem in a large space of solutions. We present how the general problem of combining quality expert, modeled as Bayesian classifier, can be tackled via a simulated annealing algorithm custimization. The general approach was applied to built an expert predicting object-oriented software stability, a facet of software quality. Our findings demonstrate that, on available data, composed espert predictive accuracy outperforms the best available expert.
  97. [97]S. Bouktif, G. Antoniol, and E. Merlo, “A Feedback Based Quality Assessment to Support Open Source Software Evolution: the GRASS Case Study,” in ICSM, 2006, pp. 155–165.
    Bibtex
      @inproceedings{04021333,
      author = {Bouktif, Salah and Antoniol, Giuliano and Merlo, Ettore},
      title = {A Feedback Based Quality Assessment to Support Open Source Software Evolution: the GRASS Case Study},
      booktitle = {ICSM},
      year = {2006},
      pages = {155-165},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ICSM.2006.5},
      crossref = {DBLP:conf/icsm/2006},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2006/04021333.pdf}
    }
    
      
    Abstract Managing the software evolution for large open source software is a major challenge. Some factors that make software hard to maintain are geographically distributed development teams, frequent and rapid turnover of volunteers, absence of a formal means, and lack of documentation and explicit project planning. In this paper we propose remote and continuous analysis of open source software to monitor evolution using available resources such as CVS code repository, commitment log files and exchanged mail. Evolution monitoring relies on three principal services. The first service analyzes and monitors the increase in complexity and the decline in quality; the second supports distributed developers by sending them a feedback report after each contribution; the third allows developers to gain insight into the "big picture" of software by providing a dashboard of project evolution. Besides the description of provided services, the paper presents a prototype environment for continuous analysis of the evolution of GRASS, an open source software
  98. [98]G. Antoniol, J. Krinke, and P. Tonella, “Special issue on Source code analysis and manipulation,” Sci. Comput. Program., vol. 62, no. 3, pp. 205–208, 2006.
    Bibtex
      @article{journals/scp/AntoniolKT06,
      author = {Antoniol, Giuliano and Krinke, Jens and Tonella, Paolo},
      title = {Special issue on Source code analysis and manipulation},
      journal = {Sci. Comput. Program.},
      volume = {62},
      number = {3},
      year = {2006},
      pages = {205-208},
      ee = {http://dx.doi.org/10.1016/j.scico.2006.04.009},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  99. [99]D. Poshyvanyk, A. Marcus, V. Rajlich, Y.-G. Guéhéneuc, and G. Antoniol, “Combining Probabilistic Ranking and Latent Semantic Indexing for Feature Identification,” in ICPC, 2006, pp. 137–148.
    Bibtex
      @inproceedings{01631116,
      author = {Poshyvanyk, Denys and Marcus, Andrian and Rajlich, V{\'a}clav and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Combining Probabilistic Ranking and Latent Semantic Indexing for Feature Identification},
      booktitle = {ICPC},
      year = {2006},
      pages = {137-148},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ICPC.2006.17},
      crossref = {DBLP:conf/iwpc/2006},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2006/01631116.pdf}
    }
    
      
    Abstract The paper recasts the problem of feature location in source code as a decision-making problem in the presence of uncertainty. The main contribution consists in the combination of two existing techniques for feature location in source code. Both techniques provide a set of ranked facts from the software, as result to the feature identification problem. One of the techniques is based on a Scenario Based Probabilistic ranking of events observed while executing a program under given scenarios. The other technique is defined as an information retrieval task, based on the Latent Semantic Indexing of the source code. We show the viability and effectiveness of the combined technique with two case studies. A first case study is a replication of feature identification in Mozilla, which allows us to directly compare the results with previously published data. The other case study is a bug location problem in Mozilla. The results show that the combined technique improves feature identification significantly with respect to each technique used independently
  100. [100]G. Antoniol, V. F. Rollo, and G. Venturi, “Linear predictive coding and cepstrum coefficients for mining time variant information from software repositories,” ACM SIGSOFT Software Engineering Notes, vol. 30, no. 4, pp. 1–5, 2005.
    Bibtex
      @article{p14-antoniol,
      author = {Antoniol, Giuliano and Rollo, Vincenzo Fabio and Venturi, Gabriele},
      title = {Linear predictive coding and cepstrum coefficients for mining time variant information from software repositories},
      journal = {ACM SIGSOFT Software Engineering Notes},
      volume = {30},
      number = {4},
      year = {2005},
      pages = {1-5},
      ee = {http://doi.acm.org/10.1145/1082983.1083156},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2005/p14-antoniol.pdf}
    }
    
      
    Abstract This paper presents an approach to recover time variant information from software repositories. It is widely accepted that software evolves due to factors such as defect removal, market opportunity or adding new features. Software evolution details are stored in software repositories which often contain the changes history. On the other hand there is a lack of approaches, technologies and methods to efficiently extract and represent time dependent information. Disciplines such as signal and image processing or speech recognition adopt frequency domain representations to mitigate differences of signals evolving in time. Inspired by time-frequency duality, this paper proposes the use of Linear Predictive Coding (LPC) and Cepstrum coefficients to model time varying software artifact histories. LPC or Cepstrum allow obtaining very compact representations with linear complexity. These representations can be used to highlight components and artifacts evolved in the same way or with very similar evolution patterns. To assess the proposed approach we applied LPC and Cepstral analysis to 211 Linux kernel releases (i.e., from 1.0 to 1.3.100), to identify files with very similar size histories. The approach, the preliminary results and the lesson learned are presented in this paper.
  101. [101]J. I. Maletic, G. Antoniol, J. Cleland-Huang, and J. H. Hayes, “3rd international workshop on traceability in emerging forms of software engineering (TEFSE 2005),” in ASE, 2005, p. 462.
    Bibtex
      @inproceedings{conf/kbse/MaleticACH05,
      author = {Maletic, Jonathan I. and Antoniol, Giuliano and Cleland-Huang, Jane and Hayes, Jane Huffman},
      title = {3rd international workshop on traceability in emerging forms of software engineering (TEFSE 2005)},
      booktitle = {ASE},
      year = {2005},
      pages = {462},
      ee = {http://doi.acm.org/10.1145/1101908.1102002},
      crossref = {DBLP:conf/kbse/2005},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  102. [102]M. Salah, S. Mancoridis, G. Antoniol, and M. D. Penta, “Towards Employing Use-Cases and Dynamic Analysis to Comprehend Mozilla,” in ICSM, 2005, pp. 639–642.
    Bibtex
      @inproceedings{01510163,
      author = {Salah, Maher and Mancoridis, Spiros and Antoniol, Giuliano and Penta, Massimiliano Di},
      title = {Towards Employing Use-Cases and Dynamic Analysis to Comprehend Mozilla},
      booktitle = {ICSM},
      year = {2005},
      pages = {639-642},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ICSM.2005.94},
      crossref = {DBLP:conf/icsm/2005},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2005/01510163.pdf}
    }
    
      
    Abstract This paper presents an approach for comprehending large software systems using views that are created by subjecting the software systems to dynamic analysis under various use-case scenarios. Two sets of views are built from the runtime data: (1) graphs that capture the parts of the software’s architecture that pertain to the use-cases; and (2) metrics that measure the intricacy of the software and the similarity between the software’s use-cases. The Mozilla Web browser was chosen as the subject software system in our case study due to its size, intricacy, and ability to expose the challenges of analyzing large systems.
  103. [103]G. Antoniol, M. Ceccarelli, and A. Petrosino, “Microarray image addressing based on the Radon transform,” in ICIP (1), 2005, pp. 13–16.
    Bibtex
      @inproceedings{01529675,
      author = {Antoniol, Giuliano and Ceccarelli, Michele and Petrosino, Alfredo},
      title = {Microarray image addressing based on the Radon transform},
      booktitle = {ICIP (1)},
      year = {2005},
      pages = {13-16},
      ee = {http://dx.doi.org/10.1109/ICIP.2005.1529675},
      crossref = {DBLP:conf/icip/2005},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2005/01529675.pdf}
    }
    
      
    Abstract A fundamental step of microarray image analysis is the detection of the grid structure for the accurate localization of each spot, representing the state of a given gene in a particular experimental condition. This step is known as gridding or microarray addressing. Most of the available microarray gridding approaches require human intervention; for example, to specify landmarks, some points in the spot grid, or even to precisely locate individual spots. Automating this part of the process can allow high throughput analysis (Yang, Y, et al, 2002). This paper is aimed towards at the development fully automated procedures for the problem of automatic microarray gridding. Indeed, many of the automatic gridding approaches are based on two phases, the first aimed at the generation of an hypothesis consisting into a regular interpolating grid, whereas the second performs an adaptation of the hypothesis. Here we show that the first step can efficiently be accomplished by using the Radon transform, whereas the second step could be modeled by an iterative posterior maximization procedure (Antoniol, G and Ceccarelli, M, 2004).
  104. [104]C. D. Grosso, G. Antoniol, M. D. Penta, P. Galinier, and E. Merlo, “Improving network applications security: a new heuristic to generate stress testing data,” in GECCO, 2005, pp. 1037–1043.
    Bibtex
      @inproceedings{GrossoAPGM05,
      author = {Grosso, Concettina Del and Antoniol, Giuliano and Penta, Massimiliano Di and Galinier, Philippe and Merlo, Ettore},
      title = {Improving network applications security: a new heuristic to generate stress testing data},
      booktitle = {GECCO},
      year = {2005},
      pages = {1037-1043},
      ee = {http://doi.acm.org/10.1145/1068009.1068185},
      crossref = {DBLP:conf/gecco/2005},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Buffer overflows cause serious problems in different categories of software systems. For example, if present in network or security applications, they can be exploited to gain unauthorized grant or access to the system. In embedded systems, such as avionics or automotive systems, they can be the cause of serious accidents. This paper proposes to combine static analysis and program slicing with evolutionary testing, to detect buffer overflow threats. Static analysis identifies vulnerable statements, while slicing and data dependency analysis identify the relationship between these statements and program or function inputs, thus reducing the search space. To guide the search towards discovering buffer overflow in this work we define three multi-objective fitness functions and compare them on two open-source systems. These functions account for terms such as the statement coverage, the coverage of vulnerable statements, the distance form buffer boundaries and the coverage of unconstrained nodes of the control flow graph.
  105. [105]G. Antoniol, V. F. Rollo, and G. Venturi, “Detecting groups of co-changing files in CVS repositories,” in IWPSE, 2005, pp. 23–32.
    Bibtex
      @inproceedings{conf/iwpse/AntoniolRV05,
      author = {Antoniol, Giuliano and Rollo, Vincenzo Fabio and Venturi, Gabriele},
      title = {Detecting groups of co-changing files in CVS repositories},
      booktitle = {IWPSE},
      year = {2005},
      pages = {23-32},
      ee = {http://doi.ieeecomputersociety.org/10.1109/IWPSE.2005.11},
      crossref = {DBLP:conf/iwpse/2005},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  106. [106]M. D. Penta, M. Neteler, G. Antoniol, and E. Merlo, “A language-independent software renovation framework,” Journal of Systems and Software, vol. 77, no. 3, pp. 225–240, 2005.
    Bibtex
      @article{journals/jss/PentaNAM05,
      author = {Penta, Massimiliano Di and Neteler, Markus and Antoniol, Giuliano and Merlo, Ettore},
      title = {A language-independent software renovation framework},
      journal = {Journal of Systems and Software},
      volume = {77},
      number = {3},
      year = {2005},
      pages = {225-240},
      ee = {http://dx.doi.org/10.1016/j.jss.2004.03.033},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  107. [107]G. Antoniol and Y.-G. Guéhéneuc, “Feature Identification: A Novel Approach and a Case Study,” in ICSM, 2005, pp. 357–366.
    Bibtex
      @inproceedings{conf/icsm/AntoniolG05,
      author = {Antoniol, Giuliano and Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l},
      title = {Feature Identification: A Novel Approach and a Case Study},
      booktitle = {ICSM},
      year = {2005},
      pages = {357-366},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ICSM.2005.48},
      crossref = {DBLP:conf/icsm/2005},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Feature identification is a well-known technique to identify subsets of a program source code activated when exercising a functionality. Several approaches have been proposed to identify features. We present an approach to feature identification and comparison for large object-oriented multi-threaded programs using both static and dynamic data. We use processor emulation, knowledge filtering, and probabilistic ranking to overcome the difficulties of collecting dynamic data, i.e., imprecision and noise. We use model transformations to compare and to visualise identified features. We compare our approach with a naive approach and a concept analysis-based approach using a case study on a real-life large object-oriented multi-threaded program, Mozilla, to show the advantages of our approach. We also use the case study to compare processor emulation with statistical profiling.
  108. [108]G. Antoniol, M. D. Penta, and M. Harman, “Search-Based Techniques Applied to Optimization of Project Planning for a Massive Maintenance Project,” in ICSM, 2005, pp. 240–249.
    Bibtex
      @inproceedings{conf/icsm/AntoniolPH05,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di and Harman, Mark},
      title = {Search-Based Techniques Applied to Optimization of Project Planning for a Massive Maintenance Project},
      booktitle = {ICSM},
      year = {2005},
      pages = {240-249},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ICSM.2005.79},
      crossref = {DBLP:conf/icsm/2005},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  109. [109]Y.-G. Guéhéneuc and G. Antoniol, “Report on the 1st International Workshop on Design Pattern Theory and Practice,” in STEP, 2005, pp. 193–195.
    Bibtex
      @inproceedings{conf/step/GueheneucA05,
      author = {Gu{\'e}h{\'e}neuc, Yann-Ga{\"e}l and Antoniol, Giuliano},
      title = {Report on the 1st International Workshop on Design Pattern Theory and Practice},
      booktitle = {STEP},
      year = {2005},
      pages = {193-195},
      ee = {http://doi.ieeecomputersociety.org/10.1109/STEP.2005.20},
      crossref = {DBLP:conf/step/2005},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  110. [110]G. Antoniol, M. D. Penta, H. Gall, and M. Pinzger, “Towards the Integration of Versioning Systems, Bug Reports and Source Code Meta-Models,” Electr. Notes Theor. Comput. Sci., vol. 127, no. 3, pp. 87–99, 2005.
    Bibtex
      @article{journals/entcs/AntoniolPGP05,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di and Gall, Harald and Pinzger, Martin},
      title = {Towards the Integration of Versioning Systems, Bug Reports and Source Code Meta-Models},
      journal = {Electr. Notes Theor. Comput. Sci.},
      volume = {127},
      number = {3},
      year = {2005},
      pages = {87-99},
      ee = {http://dx.doi.org/10.1016/j.entcs.2004.08.036},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  111. [111]G. Antoniol, M. Ceccarelli, P. Petrillo, and A. Petrosino, “An ICA Approach to Unsupervised Change Detection in Multispectral Images,” in WIRN, 2004, pp. 299–311.
    Bibtex
      @inproceedings{chp3A1010072F140203432635,
      author = {Antoniol, Giuliano and Ceccarelli, Michele and Petrillo, P. and Petrosino, Alfredo},
      title = {An ICA Approach to Unsupervised Change Detection in Multispectral Images},
      booktitle = {WIRN},
      year = {2004},
      pages = {299-311},
      ee = {http://dx.doi.org/10.1007/1-4020-3432-6_35},
      crossref = {DBLP:conf/wirn/2004},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2004/chp3A1010072F140203432635.pdf}
    }
    
      
    Abstract Detecting regions of change in multiple images of the same scene taken at dif- ferent times is of widespread interest due to a large number of applications in diverse disciplines, including remote sensing, surveillance, medical diagnosis and treatment, civil infrastructure, and underwater sensing. The paper proposes a data dependent change detection approach based on textural features extracted by the Independent Component Analysis (ICA) model. The properties of ICA allow to create energy features for computing multispec- tral and multitemporal difference images to be classified. Our experiments on remote sensing images show that the proposed method can efficiently and effec- tively classify temporal discontinuities corresponding to changed areas over the observed scenes
  112. [112]G. Antoniol, S. Gradara, and G. Venturi, “Methodological issues in a CMM Level 4 implementation,” Software Process: Improvement and Practice, vol. 9, no. 1, pp. 33–50, 2004.
    Bibtex
      @article{journals/sopr/AntoniolGV04,
      author = {Antoniol, Giuliano and Gradara, Sara and Venturi, Gabriele},
      title = {Methodological issues in a CMM Level 4 implementation},
      journal = {Software Process: Improvement and Practice},
      volume = {9},
      number = {1},
      year = {2004},
      pages = {33-50},
      ee = {http://dx.doi.org/10.1002/spip.183},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract The Capability Maturity Model (CMM) developed by the Software Engineering Institute is an improvement paradigm. It provides a framework for assessing the maturity of software processes on a five level scale, and guidelines which help to improve software process and artifact quality. Moving towards CMM Level 4 and Level 5, is a very demanding task even for large software companies already accustomed to the CMM and ISO certifications. It requires, for example, quality monitoring, control, feedback, and process optimization. In fact, going beyond CMM Level 3 requires a radical change in the way projects are carried out and managed. It involves quantitative and statistical techniques to control software processes and quality, and it entails substantial changes in the way the organization approaches software life cycle activities. In this paper we describe the process changes, adaptation, integration and tailoring, and we report lessons learned while preparing an Italian solution centre of EDS for the Level 4 internal assessment. The solution centre has about 350 people and carries out about 40 software development and maintenance projects each year. We describe how Level 4 Key Process Areas have been implemented building a methodological framework which leverages both existing available methodologies and practices already in place (e.g., derived form ISO compliance). We discuss how methodologies have been adapted to the company’s internal and external situation and what are the underlining assumptions for the methodology adaptation. Furthermore we discuss cultural and organizational changes required to obtain a CMM Level 4 certification. The steps and the process improvement we have carried out, and the challenges we have faced were most likely those whith the highest risk and cost driving factor common to all organizations aiming at achieving CMM Level 4.
  113. [113]G. Antoniol, M. D. Penta, G. Masone, and U. Villano, “Compiler Hacking for Source Code Analysis,” Software Quality Journal, vol. 12, no. 4, pp. 383–406, 2004.
    Bibtex
      @article{journals/sqj/AntoniolPMV04,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di and Masone, Gianluca and Villano, Umberto},
      title = {Compiler Hacking for Source Code Analysis},
      journal = {Software Quality Journal},
      volume = {12},
      number = {4},
      year = {2004},
      pages = {383-406},
      ee = {http://dx.doi.org/10.1023/B:SQJO.0000039794.29432.7e},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Many activities related to software quality assessment and improvement, such as empirical model construction, data flow analysis, testing or reengineering, rely on static source code analysis as the first and fundamental step for gathering the necessary input information. In the past, two different strategies have been adopted to develop tool suites. There are tools encompassing or implementing the source parse step, where the parser is internal to the toolkit, and is developed and maintained with it. A different approach builds tools on the top of external already-available components such as compilers that output the program abstract syntax tree, or that make it available via an API. This paper discusses techniques, issues and challenges linked to compiler patching or wrapping for analysis purposes. In particular, different approaches for accessing the compiler parsing information are compared, and the techniques used to decouple the parsing front end from the analysis modules are discussed. Moreover, the paper presents an approach and a tool, XOgastan, developed exploiting the gcc/g++ ability to save a representation of the intermediate abstract syntax tree. XOgastan translates the gcc/g++ dumped abstract syntax tree format into a Graph eXchange Language representation, which makes it possible to take advantage of currently available XML tools for any subsequent analysis step. The tool is illustrated and its design discussed, showing its architecture and the main implementation choices made.
  114. [114]G. Antoniol, A. Cimitile, G. A. D. Lucca, and M. D. Penta, “Assessing Staffing Needs for a Software Maintenance Project through Queuing Simulation,” IEEE Trans. Software Eng., vol. 30, no. 1, pp. 43–58, 2004.
    Bibtex
      @article{01265735,
      author = {Antoniol, Giuliano and Cimitile, Aniello and Lucca, Giuseppe A. Di and Penta, Massimiliano Di},
      title = {Assessing Staffing Needs for a Software Maintenance Project through Queuing Simulation},
      journal = {IEEE Trans. Software Eng.},
      volume = {30},
      number = {1},
      year = {2004},
      pages = {43-58},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2004.1265735},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2004/01265735.pdf}
    }
    
      
    Abstract We present an approach based on queuing theory and stochastic simulation to help planning, managing, and controlling the project staffing and the resulting service level in distributed multiphase maintenance processes. Data from a Y2K massive maintenance intervention on a large COBOL/JCL financial software system were used to simulate and study different service center configurations for a geographically distributed software maintenance project. In particular, a monolithic configuration corresponding to the customer’s point-of-view and more fine-grained configurations, accounting for different process phases as well as for rework, were studied. The queuing theory and stochastic simulation provided a means to assess staffing, evaluate service level, and assess the likelihood to meet the project deadline while executing the project. It turned out to be an effective staffing tool for managers, provided that it is complemented with other project-management tools, in order to prioritize activities, avoid conflicts, and check the availability of resources.
  115. [115]G. Antoniol and M. Ceccarelli, “A Markov Random Field Approach to Microarray Image Gridding,” in ICPR (3), 2004, pp. 550–553.
    Bibtex
      @inproceedings{01334588,
      author = {Antoniol, Giuliano and Ceccarelli, Michele},
      title = {A Markov Random Field Approach to Microarray Image Gridding},
      booktitle = {ICPR (3)},
      year = {2004},
      pages = {550-553},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ICPR.2004.50},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2004/01334588.pdf}
    }
    
      
    Abstract This paper reports a novel approach for the problem of automatic gridding in microarray images. The solution is modeled as a Bayesian random field with a Gibbs prior possibly containing first order cliques (1-clique). On the contrary of previously published contributions, this paper does not assume second order cliques, instead it relies on a two step procedure to locate microarray spots. First a set of guide spots is used to interpolate a reference grid. The final grid is then produced by an a-posteriori maximization, which takes into account the reference rectangular grid, and local deformations. The algorithm is completely automatic and no human intervention is required, the only critical parameter being the range of the radius of the guide spots.
  116. [116]G. Antoniol, M. D. Penta, and M. Harman, “Search-Based Techniques for Optimizing Software Project Resource Allocation,” in GECCO (2), 2004, pp. 1425–1426.
    Bibtex
      @inproceedings{conf/gecco/AntoniolPH04,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di and Harman, Mark},
      title = {Search-Based Techniques for Optimizing Software Project Resource Allocation},
      booktitle = {GECCO (2)},
      year = {2004},
      pages = {1425-1426},
      ee = {http://dx.doi.org/10.1007/978-3-540-24855-2_162},
      crossref = {DBLP:conf/gecco/2004-2},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract We present a search–based approach for planning resource allocation in large software projects, which aims to find an optimal or near optimal order in which to allocate work packages to programming teams, in order to minimize the project duration. The approach is validated by an empirical study of a large, commercial Y2K massive maintenance project, comparing random scheduling, hill climbing, simulating annealing and genetic algorithms, applied to two different problem encodings. Results show that a genome encoding the work package ordering, and a fitness function obtained by queuing simulation constitute the best choice, both in terms of quality of results and number of fitness evaluations required to achieve them.
  117. [117]G. Antoniol, M. D. Penta, and M. Zazzara, “Understanding Web Applications through Dynamic Analysis,” in IWPC, 2004, pp. 120–131.
    Bibtex
      @inproceedings{conf/iwpc/AntoniolPZ04,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di and Zazzara, Michele},
      title = {Understanding Web Applications through Dynamic Analysis},
      booktitle = {IWPC},
      year = {2004},
      pages = {120-131},
      ee = {http://doi.ieeecomputersociety.org/10.1109/WPC.2004.1311054},
      crossref = {DBLP:conf/iwpc/2004},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  118. [118]G. Antoniol, M. D. Penta, and M. Harman, “A Robust Search-Based Approach to Project Management in the Presence of Abandonment, Rework, Error and Uncertainty,” in IEEE METRICS, 2004, pp. 172–183.
    Bibtex
      @inproceedings{conf/metrics/AntoniolPH04,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di and Harman, Mark},
      title = {A Robust Search-Based Approach to Project Management in the Presence of Abandonment, Rework, Error and Uncertainty},
      booktitle = {IEEE METRICS},
      year = {2004},
      pages = {172-183},
      ee = {http://doi.ieeecomputersociety.org/10.1109/METRIC.2004.1357901},
      crossref = {DBLP:conf/metrics/2004},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  119. [119]G. Antoniol, M. D. Penta, and E. Merlo, “An Automatic Approach to identify Class Evolution Discontinuities,” in IWPSE, 2004, pp. 31–40.
    Bibtex
      @inproceedings{01334766,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di and Merlo, Ettore},
      title = {An Automatic Approach to identify Class Evolution Discontinuities},
      booktitle = {IWPSE},
      year = {2004},
      pages = {31-40},
      ee = {http://doi.ieeecomputersociety.org/10.1109/IWPSE.2004.1334766},
      crossref = {DBLP:conf/iwpse/2004},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2004/01334766.pdf}
    }
    
      
    Abstract When a software system evolves, features are added, removed and changed. Moreover, refactoring activities are periodically performed to improve the software internal structure. A class may be replaced by another, two classes can be merged, or a class may be split in two others. As a consequence, it may not be possible to trace software features between a release and another. When studying software evolution, we should be able to trace a class lifetime even when it disappears because it is replaced by a similar one, split or merged. Such a capability is also essential to perform impact analysis. This work proposes an automatic approach, inspired on vector space information retrieval, to identify class evolution discontinuities and, therefore, cases of possible refactoring. The approach has been applied to identify refactorings performed over 40 releases of a Java open source domain name server. Almost all the refactorings found were actually performed in the analyzed system, thus indicating the helpfulness of the approach and of the developed tool.
  120. [120]E. Merlo, G. Antoniol, M. D. Penta, and V. F. Rollo, “Linear Complexity Object-Oriented Similarity for Clone Detection and Software Evolution Analyses,” in ICSM, 2004, pp. 412–416.
    Bibtex
      @inproceedings{conf/icsm/MerloAPR04,
      author = {Merlo, Ettore and Antoniol, Giuliano and Penta, Massimiliano Di and Rollo, Vincenzo Fabio},
      title = {Linear Complexity Object-Oriented Similarity for Clone Detection and Software Evolution Analyses},
      booktitle = {ICSM},
      year = {2004},
      pages = {412-416},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ICSM.2004.1357826},
      crossref = {DBLP:conf/icsm/2004},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  121. [121]G. Antoniol and M. D. Penta, “A Distributed Architecture for Dynamic Analyses on User-Profile Data,” in CSMR, 2004, pp. 319–328.
    Bibtex
      @inproceedings{conf/csmr/AntoniolP04,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di},
      title = {A Distributed Architecture for Dynamic Analyses on User-Profile Data},
      booktitle = {CSMR},
      year = {2004},
      pages = {319-328},
      ee = {http://dx.doi.org/10.1109/CSMR.2004.1281434, http://doi.ieeecomputersociety.org/10.1109/CSMR.2004.1281434},
      crossref = {DBLP:conf/csmr/2004},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Combining static and dynamic information is highly relevant in many reverse engineering, program comprehension and maintenance task. Dynamic analysis is particularly effective when information is collected during a long period of time in a real user environment. This, however, poses several challenges. First and foremost, it is necessary to model the extraction of any relevant dynamic information from execution traces, thus avoiding to collect a large amount of unmanageable data. Second, we need a distributed architecture that allows to collect and compress such an information from geographically distributed users. We propose a probabilistic model for representing dynamic information, as well as a web-service based distributed architecture for its collection and compression. The new architecture has been instantiated to collect interprocedural program execution traces up to a selectable level of calling context sensitivity. The paper details the role and responsibilities of the architecture components, as well as performance and compression ratios achieved on a set of C and Java programs.
  122. [122]G. Antoniol and M. D. Penta, “Library Miniaturization Using Static and Dynamic Information,” in ICSM, 2003, p. 235-.
    Bibtex
      @inproceedings{conf/icsm/AntoniolP03,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di},
      title = {Library Miniaturization Using Static and Dynamic Information},
      booktitle = {ICSM},
      year = {2003},
      pages = {235-},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ICSM.2003.1235426},
      crossref = {DBLP:conf/icsm/2003},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  123. [123]E. Merlo, G. Antoniol, and P.-L. Brunelle, “Fast Flow Analysis to Compute Fuzzy Estimates of Risk Levels,” in CSMR, 2003, p. 351-.
    Bibtex
      @inproceedings{01192443,
      author = {Merlo, Ettore and Antoniol, Giuliano and Brunelle, Pierre-Luc},
      title = {Fast Flow Analysis to Compute Fuzzy Estimates of Risk Levels},
      booktitle = {CSMR},
      year = {2003},
      pages = {351-},
      ee = {http://dx.doi.org/10.1109/CSMR.2003.1192443, http://doi.ieeecomputersociety.org/10.1109/CSMR.2003.1192443},
      crossref = {DBLP:conf/csmr/2003},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2003/01192443.pdf}
    }
    
      
    Abstract In the context of software quality assessment, this paper proposes original flow analyses which propagate numerical estimates of blocking risks along an inter-procedural control flow graph (CFG) and which combine these estimates along the different CFG paths using fuzzy logic operations. Two specialized analyses can be further defined in terms of definite and possible flow analysis. The definite analysis computes the minimum blocking risk levels that statements may encounter on every path, while the possible analysis computes the highest blocking risk levels encountered by statements on at least one path. This paper presents original flow equations to compute the definite and possible blocking risk levels for statements in source code. The described fix-point algorithm presents a linear execution time and memory complexity and it is also fast in practice. The experimental context used to validate the presented approach is described and results are reported and discussed for eight publicly available systems written in C whose total size is about 300 KLOC Results show that the analyses can be used to compute, identify, and compare definite and possible blocking risks in software systems. Furthermore, programs which are known to be synchronized like "samba" show a relatively high level of blocking risks. On the other hand, the approach allows to identify even low levels of blocking risks as those presented by programs like "gawk".
  124. [124]G. Antoniol, R. Fiutem, and C. J. Lokan, “Object-Oriented Function Points: An Empirical Validation,” Empirical Software Engineering, vol. 8, no. 3, pp. 225–254, 2003.
    Bibtex
      @article{journals/ese/AntoniolFL03,
      author = {Antoniol, Giuliano and Fiutem, Roberto and Lokan, Christopher J.},
      title = {Object-Oriented Function Points: An Empirical Validation},
      journal = {Empirical Software Engineering},
      volume = {8},
      number = {3},
      year = {2003},
      pages = {225-254},
      ee = {http://dx.doi.org/10.1023/A:1024472727275},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract We present an empirical validation of Object-Oriented size estimation models. In previous work we proposed Object Oriented Function Points (OOFP) an adaptation of the Function Points approach to Object-Oriented systems. In a small pilot study we used the OOFP method to estimate LOC. In this paper we extend the empirical validation of OOFP substantially using a larger data set and comparing OOFP with alternative predictors of LOC. The aim of the paper is to gain an understanding of which factors contribute to accurate size prediction for OO software and to position OOFP within that knowledge. A cross validation approach was adopted to build and evaluate linear models where the independent variable was either a traditional OO entity (classes methods association inheritance or a combination of them) or an OOFP-related measure. Using the full OOFP process the best size predictor achieved a normalized mean squared error of 38%. By removing Function Point weighting tables from the OOFP process and carefully analyzing collected data points and developer practices we identified several factors that influence size estimation. Our empirical evidence demonstrates that by controlling these factors size estimates could be substantially improved decreasing the normalized mean squared error to 15% — in relative terms a 56% reduction.
  125. [125]G. Antoniol, M. D. Penta, G. Masone, and U. Villano, “XOgastan: XML-Oriented gcc AST Analysis and Transformations,” in SCAM, 2003, pp. 173–182.
    Bibtex
      @inproceedings{01238043,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di and Masone, Gianluca and Villano, Umberto},
      title = {XOgastan: XML-Oriented gcc AST Analysis and Transformations},
      booktitle = {SCAM},
      year = {2003},
      pages = {173-182},
      ee = {http://doi.ieeecomputersociety.org/10.1109/SCAM.2003.1238043},
      crossref = {DBLP:conf/scam/2003},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2003/01238043.pdf}
    }
    
      
    Abstract Software maintenance, program analysis and transformation tools almost always rely on static source code analysis as the first and fundamental step to gather information. In the past, two different strategies have been adopted to develop tool suites. There are tools encompassing or implementing the source parse step, where the parser is internal to the toolkit, developed and maintained with it. A different approach builds tools on the top of external, already available, components such as compilers that output the abstract syntax tree, or make it available via an API. We present an approach and a tool, XOgastan, developed exploiting the gcc/g++ ability to save a representation of the intermediate abstract syntax tree into a file. XOgastan translates the gcc/g++ format into a graph exchange language representation, thus taking advantage of the high number of currently available XML tools for the subsequent analysis phases. The tool is illustrated and its design is discussed, showing its architecture and the main implementation choices.
  126. [126]G. Antoniol et al., “Browsing Large Pedigrees to Study of the Isolated Populations in the ‘Parco Nazionale del Cilento e Vallo di Diano,’” in WIRN, 2003, pp. 258–268.
    Bibtex
      @inproceedings{conf/wirn/AntoniolCRLNCCCALTP03,
      author = {Antoniol, Giuliano and Ceccarelli, Michele and Rollo, Vincenzo Fabio and Longo, Wanda and Nutile, Teresa and Ciullo, Marina and Colonna, Enza and Calabria, Antonietta and Astore, Maria and Lembo, Anna and Toriello, Paola and Persico, M. Grazia},
      title = {Browsing Large Pedigrees to Study of the Isolated Populations in the "Parco Nazionale del Cilento e Vallo di Diano"},
      booktitle = {WIRN},
      year = {2003},
      pages = {258-268},
      ee = {http://dx.doi.org/10.1007/978-3-540-45216-4_29},
      crossref = {DBLP:conf/wirn/2003},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  127. [127]G. Antoniol, M. D. Penta, and M. Neteler, “Moving to Smaller Libraries via Clustering and Genetic Algorithms,” in CSMR, 2003, pp. 307–316.
    Bibtex
      @inproceedings{conf/csmr/AntoniolPN03,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di and Neteler, Markus},
      title = {Moving to Smaller Libraries via Clustering and Genetic Algorithms},
      booktitle = {CSMR},
      year = {2003},
      pages = {307-316},
      ee = {http://dx.doi.org/10.1109/CSMR.2003.1192439, http://doi.ieeecomputersociety.org/10.1109/CSMR.2003.1192439},
      crossref = {DBLP:conf/csmr/2003},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  128. [128]G. Antoniol, M. D. Penta, and E. Merlo, “YAAB (Yet Another AST Browser): Using OCL to Navigate ASTs,” in IWPC, 2003, p. 13-.
    Bibtex
      @inproceedings{conf/iwpc/AntoniolPM03,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di and Merlo, Ettore},
      title = {YAAB (Yet Another AST Browser): Using OCL to Navigate ASTs},
      booktitle = {IWPC},
      year = {2003},
      pages = {13-},
      ee = {http://computer.org/proceedings/iwpc/1883/18830013abs.htm},
      crossref = {DBLP:conf/iwpc/2003},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  129. [129]G. Antoniol, M. Ceccarelli, A. Maratea, and F. Russo, “Classification of Digital Terrain Models Through Fuzzy Clustering: An Application,” in WILF, 2003, pp. 174–182.
    Bibtex
      @inproceedings{conf/wilf/AntoniolCMR03,
      author = {Antoniol, Giuliano and Ceccarelli, Michele and Maratea, Antonio and Russo, F.},
      title = {Classification of Digital Terrain Models Through Fuzzy Clustering: An Application},
      booktitle = {WILF},
      year = {2003},
      pages = {174-182},
      ee = {http://dx.doi.org/10.1007/10983652_22},
      crossref = {DBLP:conf/wilf/2003},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  130. [130]P.-L. Brunelle, E. Merlo, and G. Antoniol, “Investigating Java Type Analyses for the Receiver-Classes Testing Criterion,” in ISSRE, 2003, pp. 419–429.
    Bibtex
      @inproceedings{01251063,
      author = {Brunelle, Pierre-Luc and Merlo, Ettore and Antoniol, Giuliano},
      title = {Investigating Java Type Analyses for the Receiver-Classes Testing Criterion},
      booktitle = {ISSRE},
      year = {2003},
      pages = {419-429},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ISSRE.2003.1251063},
      crossref = {DBLP:conf/issre/2003},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2003/01251063.pdf}
    }
    
      
    Abstract his paper investigates the precision of three linear-complexity type analyses for Java software: Class Hierarchy Analysis (CHA), Rapid Type Analysis (RTA) and Variable Type Analysis (VTA). Precision is measured relative to class targets. Class targets results are useful in the context of the receiver-classes criterion, which is an object-oriented testing strategy that aims to exercise every possible class binding of the receiver object reference at each dynamic call site. In this context, using a more precise analysis decreases the number of infeasible bindings to cover, thus it reduces the time spent on conceiving test data sets. This paper also introduces two novel variations to VTA, called the iteration and intersection variants. We present experimental results about the precision of CHA, RTA and VTA on a set of 17 Java programs, corresponding to a total of 600 kLOC of source code. Results show that, on average, RTA suggests 13% less bindings than CHA, standard VTA suggests 23% less bindings than CHAt and VTA with the two variations together suggests 32% less bindings than CHA.
  131. [131]M. D. Penta, S. Gradara, and G. Antoniol, “Traceability Recovery in RAD Software Systems,” in IWPC, 2002, pp. 207–218.
    Bibtex
      @inproceedings{PentaGA02,
      author = {Penta, Massimiliano Di and Gradara, Sara and Antoniol, Giuliano},
      title = {Traceability Recovery in RAD Software Systems},
      booktitle = {IWPC},
      year = {2002},
      pages = {207-218},
      ee = {http://computer.org/proceedings/iwpc/1495/14950207abs.htm},
      crossref = {DBLP:conf/iwpc/2002},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  132. [132]M. D. Penta, M. Neteler, G. Antoniol, and E. Merlo, “Knowledge-Based Library Re-Factoring for an Open Source Project,” in WCRE, 2002, pp. 319–328.
    Bibtex
      @inproceedings{conf/wcre/PentaNAM02,
      author = {Penta, Massimiliano Di and Neteler, Markus and Antoniol, Giuliano and Merlo, Ettore},
      title = {Knowledge-Based Library Re-Factoring for an Open Source Project},
      booktitle = {WCRE},
      year = {2002},
      pages = {319-328},
      ee = {http://computer.org/proceedings/wcre/1799/17990319abs.htm},
      crossref = {DBLP:conf/wcre/2002},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  133. [133]E. Merlo, M. Dagenais, P. Bachand, J. S. Sormani, S. Gradara, and G. Antoniol, “Investigating Large Software System Evolution: The Linux Kernel,” in COMPSAC, 2002, pp. 421–426.
    Bibtex
      @inproceedings{01045038,
      author = {Merlo, Ettore and Dagenais, Michel and Bachand, P. and Sormani, J. S. and Gradara, Sara and Antoniol, Giuliano},
      title = {Investigating Large Software System Evolution: The Linux Kernel},
      booktitle = {COMPSAC},
      year = {2002},
      pages = {421-426},
      ee = {http://doi.ieeecomputersociety.org/10.1109/CMPSAC.2002.1045038},
      crossref = {DBLP:conf/compsac/2002},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2002/01045038.pdf}
    }
    
      
    Abstract Large multi-platform multi-million lines of codes software systems evolve to cope with new platform or to meet user ever changing needs. While there has been several studies focused on the similarity of code fragments or modules few studies addressed the need to monitor the overall system evolution. Meanwhile the decision to evolve or to refactor a large software system needs to be supported by high level information representing the system overall picture abstracting from unnecessary details. This paper proposes to extend the concept of similarity of code fragments to quantify similarities at the release/system level. Similarities are captured by four software metrics representative of the commonalities and differences within and among software artifacts. To show the feasibility of characterizing large software system with the new metrics 365 releases of the Linux kernel were analyzed. The metrics the experimental results as well as the lessons learned are presented in the paper.
  134. [134]G. Antoniol, G. Canfora, G. Casazza, A. D. Lucia, and E. Merlo, “Recovering Traceability Links between Code and Documentation,” IEEE Trans. Software Eng., vol. 28, no. 10, pp. 970–983, 2002.
    Bibtex
      @article{journals/tse/AntoniolCCLM02,
      author = {Antoniol, Giuliano and Canfora, Gerardo and Casazza, Gerardo and Lucia, Andrea De and Merlo, Ettore},
      title = {Recovering Traceability Links between Code and Documentation},
      journal = {IEEE Trans. Software Eng.},
      volume = {28},
      number = {10},
      year = {2002},
      pages = {970-983},
      ee = {http://doi.ieeecomputersociety.org/10.1109/TSE.2002.1041053},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Software system documentation is almost always expressed informally in natural language and free text. Examples include requirement specifications design documents manual pages system development journals error logs and related maintenance reports. We propose a method based on information retrieval to recover traceability links between source code and free text documents. A premise of our work is that programmers use meaningful names for program items such as functions variables types classes and methods. We believe that the application-domain knowledge that programmers process when writing the code is often captured by the mnemonics for identifiers; therefore the analysis of these mnemonics can help to associate high level concepts with program concepts and vice-versa. We apply both a probabilistic and a vector space information retrieval model in two case studies to trace C++ source code onto manual pages and Java code onto functional requirements. We compare the results of applying the two models discuss the benefits and limitations and describe directions for improvements.
  135. [135]G. Antoniol, U. Villano, E. Merlo, and M. D. Penta, “Analyzing cloning evolution in the Linux kernel,” Information & Software Technology, vol. 44, no. 13, pp. 755–765, 2002.
    Bibtex
      @article{1s20S0950584902001234main,
      author = {Antoniol, Giuliano and Villano, Umberto and Merlo, Ettore and Penta, Massimiliano Di},
      title = {Analyzing cloning evolution in the Linux kernel},
      journal = {Information {\&} Software Technology},
      volume = {44},
      number = {13},
      year = {2002},
      pages = {755-765},
      ee = {http://dx.doi.org/10.1016/S0950-5849(02)00123-4},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2002/1-s2.0-S0950584902001234-main.pdf}
    }
    
      
    Abstract Identifying code duplication in large multi-platform software system is a challenging problem. This is due to a variety of reasons including the presence of high-level programming languages and structures interleaved with hardware-dependent low-level resources and assembler code the use of GUI-based configuration scripts generating commands to compile the system and the extremely high number of possible different configurations. This paper studies the extent and the evolution of code duplications in the Linux kernel. Linux is a large multi-platform software system; it is based on the Open Source concept and so there are no obstacles to discussing its implementation. In addition it is decidedly too large to be examined manually: the current Linux kernel release (2.4.18) is about three million LOCs.
  136. [136]G. Antoniol, L. C. Briand, M. D. Penta, and Y. Labiche, “A Case Study Using the Round-Trip Strategy for State-Based Class Testing,” in ISSRE, 2002, pp. 269–279.
    Bibtex
      @inproceedings{01173268,
      author = {Antoniol, Giuliano and Briand, Lionel C. and Penta, Massimiliano Di and Labiche, Yvan},
      title = {A Case Study Using the Round-Trip Strategy for State-Based Class Testing},
      booktitle = {ISSRE},
      year = {2002},
      pages = {269-279},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ISSRE.2002.1173268},
      crossref = {DBLP:conf/issre/2002},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2002/01173268.pdf}
    }
    
      
    Abstract A number of strategies have been proposed for state- based class testing. An important proposal made by Chow, that was subsequently adapted by Binder, consists in deriving test sequences covering all round-trip paths in a finite state machine (FSMs). Based on a number of (rather strong) assumptions, and for traditional FSMs, it can be demonstrated that all operation and transfer errors in the implementation can be uncovered. Through experimentation, this paper investigates this strategy when used in the context of UML statecharts. Based on a set of mutation operators proposed for object-oriented code we seed a significant number of faults in an implementation of a specific container class. We then investigate the effectiveness of four test teams at uncovering faults, based on the round-trip path strategy, and analyze the faults that seem to be difficult to detect. Our main conclusion is that the round-trip path strategy is reasonably effective at detecting faults (87% average as opposed to 69% for size-equivalent, random test cases) but that a significant number of faults can only exhibit a high detection probability by augmenting the round-trip strategy with a traditional black-box strategy such as category-partition testing. This increases the number of test cases to run —and therefore the cost of testing— and a cost-benefit analysis weighting the increase of testing effort and the likely gain in fault detection is necessary
  137. [137]G. Antoniol, G. Casazza, G. A. D. Lucca, M. D. Penta, and F. Rago, “A Queue Theory-Based Approach to Staff Software Maintenance Centers,” in ICSM, 2001, pp. 510–519.
    Bibtex
      @inproceedings{conf/icsm/AntoniolCLPR01,
      author = {Antoniol, Giuliano and Casazza, Gerardo and Lucca, Giuseppe A. Di and Penta, Massimiliano Di and Rago, Francesco},
      title = {A Queue Theory-Based Approach to Staff Software Maintenance Centers},
      booktitle = {ICSM},
      year = {2001},
      pages = {510-519},
      ee = {http://computer.org/proceedings/icsm/1189/11890510abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract The Internet and WEB pervasiveness are changing the landscape of several different areas ranging from information gathering/managing and commerce to software development maintenance and evolution. Software companies having a geographically distributed structure or geographically distributed customers are adopting information communication technologies to cooperate. Communicating and exchanging knowledge between different company branches and with customers creates de facto a virtual software factory. This paper proposes to adopt queue theory to deal with an economically relevant category of problems: the staffing the process management and the service level evaluation of massive maintenance projects in a virtual software factory.
  138. [138]G. Antoniol, B. Caprile, A. Potrich, and P. Tonella, “Design-code traceability recovery: selecting the basic linkage properties,” Sci. Comput. Program., vol. 40, no. 2-3, pp. 213–234, 2001.
    Bibtex
      @article{journals/scp/AntoniolCPT01,
      author = {Antoniol, Giuliano and Caprile, Bruno and Potrich, Alessandra and Tonella, Paolo},
      title = {Design-code traceability recovery: selecting the basic linkage properties},
      journal = {Sci. Comput. Program.},
      volume = {40},
      number = {2-3},
      year = {2001},
      pages = {213-234},
      ee = {http://dx.doi.org/10.1016/S0167-6423(01)00016-8},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Traceability ensures that software artifacts of subsequent phases of the development cycle are consistent. Few works have so far addressed the problem of automatically recovering traceability links between object oriented (OO) design and code entities. Such recovery process is required whenever there is no explicit support to traceability from the development process. The recovered information can drive the evolution of the available design so that it corresponds to the code thus providing a still useful and updated high level view of the system. Automatic recovery of traceability links can be achieved by determining the similarity of paired elements from design and code. The choice of the properties involved in the similarity computation is crucial for the success of the recovery process. In fact design and code objects are complex artifacts with several properties attached. The basic anchors of the recovered traceability links should be chosen as those properties (or property combinations) which are expected to be maintained during the transformation of design into code. In this paper different categories of basic properties of design and code entities will be evaluated with respect to the contribution they give to traceability recovery. Several industrial software components will be employed as a benchmark on which the performances of the alternatives are measured.
  139. [139]M. D. Penta, G. Casazza, G. Antoniol, and E. Merlo, “Modeling Web Maintenance Centers through Queue Models,” in CSMR, 2001, pp. 131–138.
    Bibtex
      @inproceedings{conf/csmr/PentaCAM01,
      author = {Penta, Massimiliano Di and Casazza, Gerardo and Antoniol, Giuliano and Merlo, Ettore},
      title = {Modeling Web Maintenance Centers through Queue Models},
      booktitle = {CSMR},
      year = {2001},
      pages = {131-138},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract The Internet and WEB pervasiveness are changing the landscape of several different areas ranging from information gathering/managing and commerce to software development maintenance and evolution. Traditionally phone-centric services such as ordering of goods maintenance/repair intervention requests and bug/defect reporting are moving towards WEB-centric solutions. This paper proposes the adoption of queue theory to support the design staffing management and assessment of WEB-centric service centers. Data from a mailing list archiving a mixture of corrective maintenance and information requests were used to mimic a service center. Queue theory was adopted to model the relation between the number of servants and the performance level. Empirical evidence revealed that by adding an express lane and a dispatcher service time variability is greatly reduced and more complex business rules may be implemented. Moreover express lane customers experience a reduction of service time even in the presence of a significant percentage of requests erroneously routed by the dispatcher.
  140. [140]B. Malenfant, G. Antoniol, E. Merlo, and M. Dagenais, “Flow Analysis to Detect Blocked Statements,” in ICSM, 2001, p. 62-.
    Bibtex
      @inproceedings{conf/icsm/MalenfantAMD01,
      author = {Malenfant, Bruno and Antoniol, Giuliano and Merlo, Ettore and Dagenais, Michel},
      title = {Flow Analysis to Detect Blocked Statements},
      booktitle = {ICSM},
      year = {2001},
      pages = {62-},
      ee = {http://computer.org/proceedings/icsm/1189/11890062abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract In the context of software quality assessment, the paper proposes two new kinds of data which can be extracted from source code. The first, definitely blocked statements, can never be executed because preceding code prevents the execution of the program. The other data, called possibly blocked statements, may be blocked by blocking code. The paper presents original flow equations to compute definitely and possibly blocked statements in source code. The experimental context is described and results are shown and discussed. Suggestions for further research are also presented.
  141. [141]P. Tonella and G. Antoniol, “Inference of object-oriented design patterns,” Journal of Software Maintenance, vol. 13, no. 5, pp. 309–330, 2001.
    Bibtex
      @article{journals/smr/TonellaA01,
      author = {Tonella, Paolo and Antoniol, Giuliano},
      title = {Inference of object-oriented design patterns},
      journal = {Journal of Software Maintenance},
      volume = {13},
      number = {5},
      year = {2001},
      pages = {309-330},
      ee = {http://dx.doi.org/10.1002/smr.235},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  142. [142]G. Antoniol, G. Casazza, M. D. Penta, and E. Merlo, “Modeling Clones Evolution through Time Series,” in ICSM, 2001, pp. 273–280.
    Bibtex
      @inproceedings{conf/icsm/AntoniolCPM01,
      author = {Antoniol, Giuliano and Casazza, Gerardo and Penta, Massimiliano Di and Merlo, Ettore},
      title = {Modeling Clones Evolution through Time Series},
      booktitle = {ICSM},
      year = {2001},
      pages = {273-280},
      ee = {http://computer.org/proceedings/icsm/1189/11890273abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract The actual effort to evolve and maintain a software system is likely to vary depending on the amount of clones (i.e. duplicated or slightly different code fragments) present in the system. This paper presents a method for monitoring and predicting clones evolution across subsequent versions of a software system. Clones are firstly identified using a metric-based approach then they are modeled in terms of time series identifying a predictive models. The proposed method has been validated with an experimental activity performed on 27 subsequent versions of mSQL a medium-size software system written in C. The time span period of the analyzed mSQL releases covers four years from May 1995 (mSQL 1.0.6) to May 1999 (mSQL 2.0.10). For any given software release the identified models was able to predict the clone percentage of the subsequent release with an average error below 4 %. An higher prediction error was observed only in correspondence of major system redesign.The actual effort to evolve and maintain a software system is likely to vary depending on the amount of clones (i.e. duplicated or slightly different code fragments) present in the system. This paper presents a method for monitoring and predicting clones evolution across subsequent versions of a software system. Clones are firstly identified using a metric-based approach then they are modeled in terms of time series identifying a predictive models. The proposed method has been validated with an experimental activity performed on 27 subsequent versions of mSQL a medium-size software system written in C. The time span period of the analyzed mSQL releases covers four years from May 1995 (mSQL 1.0.6) to May 1999 (mSQL 2.0.10). For any given software release the identified models was able to predict the clone percentage of the subsequent release with an average error below 4 %. An higher prediction error was observed only in correspondence of major system redesign.
  143. [143]G. Antoniol, G. Casazza, M. D. Penta, and R. Fiutem, “Object-oriented design patterns recovery,” Journal of Systems and Software, vol. 59, no. 2, pp. 181–196, 2001.
    Bibtex
      @article{journals/jss/AntoniolCPF01,
      author = {Antoniol, Giuliano and Casazza, Gerardo and Penta, Massimiliano Di and Fiutem, Roberto},
      title = {Object-oriented design patterns recovery},
      journal = {Journal of Systems and Software},
      volume = {59},
      number = {2},
      year = {2001},
      pages = {181-196},
      ee = {http://dx.doi.org/10.1016/S0164-1212(01)00061-9},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Object-Oriented design patterns are an emergent technology: they are reusable micro-architectures high-level building blocks. A system which has been designed using well-known documented and accepted design patterns is also likely to exhibit good properties such as modularity separation of concerns and maintainability. While for forward engineering the benefits of using design patterns are clear using reverse engineering technologies to discover instances of patterns in a software artifact (e.g. design or code) may help in several key areas among which are program understanding design-to-code traceability and quality assessment. This paper describes a conservative approach and experimental results based on a multi-stage reduction strategy using OO software metrics and structural properties to extract structural design patterns from OO design or C++ code. To assess the effectiveness of the pattern recovery approach a process and a portable tool suite written in Java remotely accessible by means of any WEB browser has been developed. The developed system and experimental results on 8 industrial software (design and code) and 200000 lines of public domain C++ code are presented.
  144. [144]G. Antoniol, G. Casazza, G. A. D. Lucca, M. D. Penta, and E. Merlo, “Predicting Web Site Access: An Application of Time Series,” in WSE, 2001, pp. 57–61.
    Bibtex
      @inproceedings{00988786,
      author = {Antoniol, Giuliano and Casazza, Gerardo and Lucca, Giuseppe A. Di and Penta, Massimiliano Di and Merlo, Ettore},
      title = {Predicting Web Site Access: An Application of Time Series},
      booktitle = {WSE},
      year = {2001},
      pages = {57-61},
      ee = {http://doi.ieeecomputersociety.org/10.1109/WSE.2001.988786},
      crossref = {DBLP:conf/wse/2001},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2001/00988786.pdf}
    }
    
      
    Abstract The Internet and Web pervasiveness are changing the landscape of several different areas ranging from information gathering/managing and commerce to software development. This paper presents a case study where time series were adopted to forecast future Web site access. In order to measure the applicability of time series to the prediction of Web site accesses, an experimental activity was performed. The log-access file of an academic Web site (http://www.ing.unisannio.it) was analyzed and its data used as test set. The analyzed Web site contains general information about the Faculty of Engineering of University of Sannio at Benevento (Italy). Preliminary results were encouraging: the average number of connections per week could be predicted with an acceptable error.
  145. [145]G. Antoniol, U. Villano, M. D. Penta, G. Casazza, and E. Merlo, “Identifying Clones in the Linux Kernel,” in SCAM, 2001, pp. 92–99.
    Bibtex
      @inproceedings{00972670,
      author = {Antoniol, Giuliano and Villano, Umberto and Penta, Massimiliano Di and Casazza, Gerardo and Merlo, Ettore},
      title = {Identifying Clones in the Linux Kernel},
      booktitle = {SCAM},
      year = {2001},
      pages = {92-99},
      ee = {http://doi.ieeecomputersociety.org/10.1109/SCAM.2001.10003},
      crossref = {DBLP:conf/scam/2001},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2001/00972670.pdf}
    }
    
      
    Abstract Large multi-platform software systems are likely to encompass hardware-dependent code or sub-systems. However, analyzing multi-platform source code is challenging, due to the variety of supported configurations. Often, the system was originally developed for a single platform, and then new target platforms were added. This practice promotes the presence of duplicated code, also called "cloned" code. The paper presents the clone percentage of a multi-platform-multi-million lines of code, Linux kernel version 2.4.0, detected with a metric-based approach. After a brief description of the procedure followed for code analysis and clone identification, the obtained results are commented upon
  146. [146]F. Calzolari, P. Tonella, and G. Antoniol, “Maintenance and testing effort modeled by linear and nonlinear dynamic systems,” Information & Software Technology, vol. 43, no. 8, pp. 477–486, 2001.
    Bibtex
      @article{1s20S0950584901001562main,
      author = {Calzolari, F. and Tonella, Paolo and Antoniol, Giuliano},
      title = {Maintenance and testing effort modeled by linear and nonlinear dynamic systems},
      journal = {Information {\&} Software Technology},
      volume = {43},
      number = {8},
      year = {2001},
      pages = {477-486},
      ee = {http://dx.doi.org/10.1016/S0950-5849(01)00156-2},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2001/1s20S0950584901001562main.pdf}
    }
    
      
    Abstract Maintenance and testing activities — conducted, respectively, on the release currently in use/to be delivered — absorb most of total lifetime cost of software development. Such economic relevance suggests investigating the maintenance and testing processes to find models allowing software engineers to better estimate, plan and manage costs and activities. Ecological systems in which predators and prey compete for surviving were investigated by applying suitable mathematical models. An analogy can be drawn between biological prey and software defects, and between predators and programmers. In fact, when programmers start trying to recognize and correct code defects, while the number of residual defects decreases, the effort spent to find any new defect has an initial increase, followed by a decline, when almost all defects are removed, similar to prey and predator populations. This paper proposes to describe the evolution of the maintenance and testing effort by means of the predator–prey dynamic model. The applicability of the model is supported by the experimental data about two real world projects. The fit of the model when parameters are estimated on all available data is high, and accurate predictions can be obtained when an initial segment of the available data is used for parameter estimation.
  147. [147]G. Antoniol, M. D. Penta, G. Casazza, and E. Merlo, “A Method to Re-Organize Legacy Systems via Concept Analysis,” in IWPC, 2001, pp. 281–292.
    Bibtex
      @inproceedings{conf/iwpc/AntoniolDCM01,
      author = {Antoniol, Giuliano and Penta, Massimiliano Di and Casazza, Gerardo and Merlo, Ettore},
      title = {A Method to Re-Organize Legacy Systems via Concept Analysis},
      booktitle = {IWPC},
      year = {2001},
      pages = {281-292},
      ee = {http://computer.org/proceedings/iwpc/1131/11310281abs.htm},
      crossref = {DBLP:conf/iwpc/2001},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  148. [148]G. Antoniol, G. Canfora, G. Casazza, and A. D. Lucia, “Maintaining traceability links during object-oriented software evolution,” Softw., Pract. Exper., vol. 31, no. 4, pp. 331–355, 2001.
    Bibtex
      @article{journals/spe/AntoniolCCL01,
      author = {Antoniol, Giuliano and Canfora, Gerardo and Casazza, Gerardo and Lucia, Andrea De},
      title = {Maintaining traceability links during object-oriented software evolution},
      journal = {Softw., Pract. Exper.},
      volume = {31},
      number = {4},
      year = {2001},
      pages = {331-355},
      ee = {http://dx.doi.org/10.1002/spe.374},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract This paper presents a method to build and maintain traceability links and properties of a set of OO software releases. The method recovers an “as is” design from C++ software releases compares recovered designs at the class interface level and helps the user to deal with inconsistencies by pointing out regions of code where differences are concentrated. The comparison step exploits edit distance and a maximum match algorithm. The method has been experimented with on two freely available C++ systems. Results as well as examples of applications to the visualization of the traceability information and to the estimation of the size of changes during maintenance are reported in the paper.
  149. [149]G. A. D. Lucca, M. D. Penta, G. Antoniol, and G. Casazza, “An Approach for Reverse Engineering of Web-Based Application,” in WCRE, 2001, pp. 231–240.
    Bibtex
      @inproceedings{conf/wcre/LuccaPAC01,
      author = {Lucca, Giuseppe A. Di and Penta, Massimiliano Di and Antoniol, Giuliano and Casazza, Gerardo},
      title = {An Approach for Reverse Engineering of Web-Based Application},
      booktitle = {WCRE},
      year = {2001},
      pages = {231-240},
      ee = {http://computer.org/proceedings/wcre/1303/13030231abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  150. [150]G. Antoniol, G. Casazza, and A. Cimitile, “Traceability Recovery by Modeling Programmer Behavior,” in WCRE, 2000, pp. 240–247.
    Bibtex
      @inproceedings{conf/wcre/AntoniolCC00,
      author = {Antoniol, Giuliano and Casazza, Gerardo and Cimitile, Aniello},
      title = {Traceability Recovery by Modeling Programmer Behavior},
      booktitle = {WCRE},
      year = {2000},
      pages = {240-247},
      ee = {http://computer.org/proceedings/wcre/0881/08810240abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  151. [151]G. Antoniol, G. Canfora, G. Casazza, and A. D. Lucia, “Identifying the Starting Impact Set of a Maintenance Request: A Case Study,” in CSMR, 2000, pp. 227–230.
    Bibtex
      @inproceedings{conf/csmr/AntoniolCCL00,
      author = {Antoniol, Giuliano and Canfora, Gerardo and Casazza, Gerardo and Lucia, Andrea De},
      title = {Identifying the Starting Impact Set of a Maintenance Request: A Case Study},
      booktitle = {CSMR},
      year = {2000},
      pages = {227-230},
      ee = {http://www.computer.org/proceedings/csmr/0546/05460227abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  152. [152]P. Tonella, G. Antoniol, R. Fiutem, and F. Calzolari, “Reverse engineering 4.7 million lines of code,” Softw., Pract. Exper., vol. 30, no. 2, pp. 129–150, 2000.
    Bibtex
      @article{journals/spe/TonellaAFC00,
      author = {Tonella, Paolo and Antoniol, Giuliano and Fiutem, Roberto and Calzolari, F.},
      title = {Reverse engineering 4.7 million lines of code},
      journal = {Softw., Pract. Exper.},
      volume = {30},
      number = {2},
      year = {2000},
      pages = {129-150},
      ee = {http://dx.doi.org/10.1002/(SICI)1097-024X(200002)30:2$<$129::AID-SPE293$>$3.0.CO;2-M},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract The ITC-Irst Reverse Engineering group was charged with analyzing a software application of approximately 4.7 million lines of C code. It was an old legacy system, maintained for a long time, on which several successive adaptive and corrective maintenance interventions had led to the degradation of the original structure. The company decided to re-engineer the software instead of replacing it, because the complexity and costs of re-implementing the application from scratch could not be afforded, and the associated risk could not be run. Several problems were encountered during re-engineering, including identifying dependencies and detecting redundant functions that were not used anymore. To accomplish these goals, we adopted a conservative approach. Before performing any kind of analysis on the whole code, we carefully evaluated the expected costs. To this aim, a small but representative sample of modules was preliminarily analyzed, and the costs and outcomes were extrapolated so as to obtain some indications on the analysis of the whole system. When the results of the sample modules were found to be useful as well as affordable for the entire system, the resources involved were carefully distributed among the different reverse engineering tasks to meet the customer’s deadline. This paper summarizes that experience, discussing how we approached the problem, the way we managed the limited resources available to complete the task within the assigned deadlines, and the lessons we learned.
  153. [153]G. Antoniol, G. Casazza, and E. Merlo, “Identification of Lower-Level Artifacts,” in IWPC, 2000, p. 253.
    Bibtex
      @inproceedings{conf/iwpc/AntoniolCM00,
      author = {Antoniol, Giuliano and Casazza, Gerardo and Merlo, Ettore},
      title = {Identification of Lower-Level Artifacts},
      booktitle = {IWPC},
      year = {2000},
      pages = {253},
      ee = {http://computer.org/proceedings/iwpc/0656/06560253abs.htm},
      crossref = {DBLP:conf/iwpc/2000},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  154. [154]G. Antoniol, G. Canfora, A. D. Lucia, G. Casazza, and E. Merlo, “Tracing Object-Oriented Code into Functional Requirements,” in IWPC, 2000, pp. 79–86.
    Bibtex
      @inproceedings{conf/iwpc/AntoniolCLCM00,
      author = {Antoniol, Giuliano and Canfora, Gerardo and Lucia, Andrea De and Casazza, Gerardo and Merlo, Ettore},
      title = {Tracing Object-Oriented Code into Functional Requirements},
      booktitle = {IWPC},
      year = {2000},
      pages = {79-86},
      ee = {http://computer.org/proceedings/iwpc/0656/06560079abs.htm},
      crossref = {DBLP:conf/iwpc/2000},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  155. [155]G. Antoniol, G. Canfora, G. Casazza, and A. D. Lucia, “Information Retrieval Models for Recovering Traceability Links between Code and Documentation,” in ICSM, 2000, p. 40-.
    Bibtex
      @inproceedings{conf/icsm/AntoniolCCL00,
      author = {Antoniol, Giuliano and Canfora, Gerardo and Casazza, Gerardo and Lucia, Andrea De},
      title = {Information Retrieval Models for Recovering Traceability Links between Code and Documentation},
      booktitle = {ICSM},
      year = {2000},
      pages = {40-},
      ee = {http://computer.org/proceedings/icsm/0753/07530040abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract The research described in this paper is concerned with the application of information retrieval to software maintenance and in particular to the problem of recovering traceability links between the source code of a system and its free text documentation. We introduce a method based on the general idea of vector space information retrieval and apply it in two case studies to trace C++ source code onto manual pages and Java code onto functional requirements. The case studies discussed in this paper replicate the studies presented in previous works where a probabilistic information retrieval model was applied. We compare the results of vector space and probabilistic models and formulate hypotheses to explain the differences.
  156. [156]G. Antoniol, B. Caprile, A. Potrich, and P. Tonella, “Design-code traceability for object-oriented systems,” Ann. Software Eng., vol. 9, pp. 35–58, 2000.
    Bibtex
      @article{art3A1010232FA3A1018916522804,
      author = {Antoniol, Giuliano and Caprile, Bruno and Potrich, Alessandra and Tonella, Paolo},
      title = {Design-code traceability for object-oriented systems},
      journal = {Ann. Software Eng.},
      volume = {9},
      year = {2000},
      pages = {35-58},
      ee = {http://dx.doi.org/10.1023/A:1018916522804},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {2000/art3A1010232FA3A1018916522804.pdf}
    }
    
      
    Abstract Traceability is a key issue to ensure consistency among software artifacts of subsequent phases of the development cycle. However, few works have so far addressed the theme of tracing object oriented (OO) design into its implementation and evolving it. This paper presents an approach to checking the compliance of OO design with respect to source code and support its evolution. The process works on design artifacts expressed in the OMT (Object Modeling Technique) notation and accepts C++ source code. It recovers an “as is” design from the code, compares the recovered design with the actual design and helps the user to deal with inconsistencies. The recovery process exploits the edit distance computation and the maximum match algorithm to determine traceability links between design and code. The output is a similarity measure associated to design‐code class pairs, which can be classified as matched and unmatched by means of a maximum likelihood threshold. A graphic display of the design with different green levels associated to different levels of match and red for the unmatched classes is provided as a support to update the design and improve its traceability to the code.
  157. [157]G. Antoniol, G. Casazza, A. Cimitile, and M. Tortorella, “An Approach to Limit the Wynot Problem,” in ICSM, 2000, pp. 207–215.
    Bibtex
      @inproceedings{conf/icsm/AntoniolCCT00,
      author = {Antoniol, Giuliano and Casazza, Gerardo and Cimitile, Aniello and Tortorella, Maria},
      title = {An Approach to Limit the Wynot Problem},
      booktitle = {ICSM},
      year = {2000},
      pages = {207-215},
      ee = {http://computer.org/proceedings/icsm/0753/07530207abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Software evolution in a cooperative environment where a pool of maintainers/developers contribute to the overall system changes is challanging due to several factors such as the poor communication among individuals and the high number of produced changes. Conflicting or contradictory changes unforeseen or unexpected dependencies may result in a non working system. We propose a strategy aimed to reduce the risk of conflicting changes in a maintenance cooperative environment. To evaluate the feasibility of our approach and to attempt to estimate the size of the code to be scrutined per single changed line we developed a number of tools and tested our approach on 30 release of DDD software system. The preliminary results are encouraging: potentially impacted LOCS per single changed LOC is on the average less than 4.
  158. [158]P. Tonella, G. Antoniol, R. Fiutem, and E. Merlo, “Variable-precision reaching definitions analysis,” Journal of Software Maintenance, vol. 11, no. 2, pp. 117–142, 1999.
    Bibtex
      @article{journals/smr/TonellaAFM99,
      author = {Tonella, Paolo and Antoniol, Giuliano and Fiutem, Roberto and Merlo, Ettore},
      title = {Variable-precision reaching definitions analysis},
      journal = {Journal of Software Maintenance},
      volume = {11},
      number = {2},
      year = {1999},
      pages = {117-142},
      ee = {http://dx.doi.org/10.1002/(SICI)1096-908X(199903/04)11:2$<$117::AID-SMR185$>$3.0.CO;2-P},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  159. [159]E. Merlo and G. Antoniol, “A static measure of a subset of intra-procedural data flow testing coverage based on node coverage,” in CASCON, 1999, p. 7.
    Bibtex
      @inproceedings{p7-merlo,
      author = {Merlo, Ettore and Antoniol, Giuliano},
      title = {A static measure of a subset of intra-procedural data flow testing coverage based on node coverage},
      booktitle = {CASCON},
      year = {1999},
      pages = {7},
      ee = {http://doi.acm.org/10.1145/781995.782002},
      crossref = {DBLP:conf/cascon/1999},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {1999/p7-merlo.pdf}
    }
    
      
    Abstract In the past years a number of research works which have been mostly based on pre and post dominator analysis have been presented about finding subsets of nodes and edges (called unrestricted subsets) such that their traversal during execution (if feasible) exercises respectively all feasible nodes and edges in a Control Flow Graph. This paper presents an approach to statically measure a subset of intra-procedural data flow (all uses) coverage obtained by exercising an unrestricted subset of nodes during testing. This measure indicates the possible degree of data flow testing obtainable while using a weaker test coverage criteria. The approach has been implemented in C++ on a PC under Linux and results obtained from the analysis of Gnu find tool which is about 16 KLOC of C-lan guage source code are presented together with discussions and conclusions.
  160. [160]R. Fiutem, G. Antoniol, P. Tonella, and E. Merlo, “ART: an architectural reverse engineering environment,” Journal of Software Maintenance, vol. 11, no. 5, pp. 339–364, 1999.
    Bibtex
      @article{journals/smr/FiutemATM99,
      author = {Fiutem, Roberto and Antoniol, Giuliano and Tonella, Paolo and Merlo, Ettore},
      title = {ART: an architectural reverse engineering environment},
      journal = {Journal of Software Maintenance},
      volume = {11},
      number = {5},
      year = {1999},
      pages = {339-364},
      ee = {http://dx.doi.org/10.1002/(SICI)1096-908X(199909/10)11:5$<$339::AID-SMR196$>$3.0.CO;2-I},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract When programmers perform maintenance tasks program understanding is often required. One of the first activities in understanding a software system is identifying its subsystems and their relations i.e. its software architecture. Since a large part of the effort is spent in creating a mental model of the system under study tools can help maintainers in managing the evolution of legacy systems by showing them architectural information. This paper describes an environment for the architectural recovery of software systems called Architectural Recovery Tool (ART). The environment is based on a hierarchical architectural model that drives the application of a set of recognizers each producing a different architectural view of a system or of some of its parts. Recognizers embody knowledge about architectural cliches and use flow analysis techniques to make their output more accurate. To test the accuracy and effectiveness of ART a suite of public domain applications containing interesting architectural organizations was selected as a benchmark. Results are presented by showing ART performance in terms of precision and recall of the architectural concept retrieval process. The results obtained show that cliche based architectural recovery is feasible and the recovered information can be a valuable support in reengineering and maintenance activities.
  161. [161]P. Tonella and G. Antoniol, “Object-Oriented Design Pattern Inference,” in ICSM, 1999, p. 230-.
    Bibtex
      @inproceedings{conf/icsm/TonellaA99,
      author = {Tonella, Paolo and Antoniol, Giuliano},
      title = {Object-Oriented Design Pattern Inference},
      booktitle = {ICSM},
      year = {1999},
      pages = {230-},
      ee = {http://computer.org/proceedings/icsm/0016/00160230abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract When designing a new application experienced software engineers usually try to employ solutions that proved successful in previous projects. Such reuse of code organizations is seldom made explicit. Nevertheless it represents important information about the system that can be extremely valuable in the maintenance phase by documenting the design choices underlying the implementation. In addition having it available it can be reused whenever a similar problem is encountered. In this paper an approach is proposed to the inference of recurrent design patterns directly from the code or the design. No assumption is made on the availability of any pattern library and the concept analysis algorithm adapted for this purpose is able to infer the presence of class groups which instantiate a common repeated pattern. In fact concept analysis provides sets of objects sharing attributes which in the case of object oriented design patterns become class members or inter-class relations. The approach was applied to a C++ application for which the structural relations among classes led to the extraction of a set of structural design patterns which could be enriched with non structural information about class members and method invocations. The resulting patterns could be interpreted as meaningful organizations aimed at solving general problems which have several instances in the analyzed application.
  162. [162]G. Antoniol, C. J. Lokan, G. Caldiera, and R. Fiutem, “A Function Point-Like Measure for Object-Oriented Software,” Empirical Software Engineering, vol. 4, no. 3, pp. 263–287, 1999.
    Bibtex
      @article{journals/ese/AntoniolLCF99,
      author = {Antoniol, Giuliano and Lokan, Christopher J. and Caldiera, Gianluigi and Fiutem, Roberto},
      title = {A Function Point-Like Measure for Object-Oriented Software},
      journal = {Empirical Software Engineering},
      volume = {4},
      number = {3},
      year = {1999},
      pages = {263-287},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract We present a method for estimating the size and consequently effort and duration of object oriented software development projects. Different estimates may be made in different phases of the development process according to the available information. We define an adaptation of traditional function points called Object Oriented Function Points to enable the measurement of object oriented analysis and design specifications. Tools have been constructed to automate the counting method. The novel aspect of our method is its flexibility. An organisation can experiment with different counting policies to find the most accurate predictors of size effort etc. in its environment. The method and preliminary results of its application in an industrial environment are presented and discussed.
  163. [163]G. Antoniol, A. Potrich, P. Tonella, and R. Fiutem, “Evolving Object Oriented Design to Improve Code Traceability,” in IWPC, 1999, p. 151-.
    Bibtex
      @inproceedings{conf/iwpc/AntoniolPTF99,
      author = {Antoniol, Giuliano and Potrich, Alessandra and Tonella, Paolo and Fiutem, Roberto},
      title = {Evolving Object Oriented Design to Improve Code Traceability},
      booktitle = {IWPC},
      year = {1999},
      pages = {151-},
      ee = {http://computer.org/proceedings/iwpc/0179/01790151abs.htm},
      crossref = {DBLP:conf/iwpc/1999},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  164. [164]S. Lapierre, E. Merlo, G. Savard, G. Antoniol, R. Fiutem, and P. Tonella, “Automatic Unit Test Data Generation Using Mixed-Integer Linear Programming and Execution Trees,” in ICSM, 1999, pp. 189–198.
    Bibtex
      @inproceedings{conf/icsm/LapierreMSAFT99,
      author = {Lapierre, S{\'e}bastien and Merlo, Ettore and Savard, Gilles and Antoniol, Giuliano and Fiutem, Roberto and Tonella, Paolo},
      title = {Automatic Unit Test Data Generation Using Mixed-Integer Linear Programming and Execution Trees},
      booktitle = {ICSM},
      year = {1999},
      pages = {189-198},
      ee = {http://computer.org/proceedings/icsm/0016/00160189abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract This paper presents an approach to automatic unit test data generation for branch coverage using mixed-integer linear programming execution trees and symbolic execution. This approach can be useful to both general testing and regression testing after software maintenance and reengineering activities. Several strategies including original algorithms to move towards practical test data generation have been investigated in this paper. Methods include: the analysis of minimum path-length partial execution trees for unconstrained arcs thus increasing the generation performance and reducing the difficulties originated by infeasible paths the reduction of the difficulties originated by non-linear path conditions by considering alternative linear paths the reduction of the number of test cases which are needed to achieve the desired coverage based on the concept of unconstrained arcs in a control flow graph the extension of symbolic execution to deal with dynamic memory allocation and deallocation pointers and pointers to functions Execution trees are symbolically executed to produce Extended Path Constraints which are then partially mapped by an original algorithm into linear problems whose solutions correspond to the test data to be used as input to cover program branches. Partially mapping this problem into a linear optimization problem avoids infeasible and non-linear path problems if a feasible linear alternate path exists in the same execution tree. The presented approach has been implemented in C++ and tested on C-language programs on a Pentium/Linux system. Preliminary results are encouraging and show that a high percentage of the program branches can be covered by the test data automatically produced. The approach is flexible to branch selection criteria coming from general testing as well as regression testing.
  165. [165]G. Antoniol, F. Calzolari, and P. Tonella, “Impact of Function Pointers on the Call Graph,” in CSMR, 1999, pp. 51–61.
    Bibtex
      @inproceedings{00756682,
      author = {Antoniol, Giuliano and Calzolari, F. and Tonella, Paolo},
      title = {Impact of Function Pointers on the Call Graph},
      booktitle = {CSMR},
      year = {1999},
      pages = {51-61},
      ee = {http://dx.doi.org/10.1109/CSMR.1999.756682, http://doi.ieeecomputersociety.org/10.1109/CSMR.1999.756682},
      crossref = {DBLP:conf/csmr/1999},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {1999/00756682.pdf}
    }
    
      
    Abstract Maintenance activities are made more difficult when pointers are heavily used in source code: the programmer needs to build a mental model of memory locations and of the way they are accessed by means of pointers in order to comprehend the functionalities of the system. Although several points-to analysis algorithms have been proposed in literature to provide information about memory locations referenced by pointers there are no quantitative evaluations of the impact of pointers on the overall program understanding activities. Program comprehension activities are usually supported by tools providing suitable views of the source program. One of the most widely used code views is the Call Graph a graph representing calls between functions in the given program. Unfortunately when pointers and especially function pointers are heavily used in the code the extracted call graph is highly inaccurate and thus of little usage if a points-to analysis is not preliminarly performed. In this paper we will address the problem of evaluating the impact of pointers analysis on the Call Graph. The results obtained on a set of real world programs provide a quantitative evaluation and show the key role of pointer analysis in Call Graph construction.
  166. [166]G. Antoniol, G. Canfora, A. D. Lucia, and E. Merlo, “Recovering Code to Documentation Links in OO Systems,” in WCRE, 1999, pp. 136–144.
    Bibtex
      @inproceedings{conf/wcre/AntoniolCLM99,
      author = {Antoniol, Giuliano and Canfora, Gerardo and Lucia, Andrea De and Merlo, Ettore},
      title = {Recovering Code to Documentation Links in OO Systems},
      booktitle = {WCRE},
      year = {1999},
      pages = {136-144},
      ee = {http://computer.org/proceedings/wcre/0303/03030136abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Software system documentation is almost always expressed informally in natural language and free text. Examples include requirement specifications design documents manual pages system development journals error logs and related maintenance reports. We propose an approach to establish and maintain traceability links between the source code and free text documents. A premise of our work is that programmers use meaningful names for programś items such as functions variables types classes and methods. We believe that the application-domain knowledge that programmers process when writing the code is often captured by the mnemonics for identifiers; therefore the analysis of these mnemonics can help to associate high level concepts with program concepts and vice-versa. In this paper the approach is applied to software written in an object-oriented language namely C++ to trace classes to manual sections.
  167. [167]R. Fiutem, P. Tonella, G. Antoniol, and E. Merlo, “Points-to analysis for program understanding,” Journal of Systems and Software, vol. 44, no. 3, pp. 213–227, 1999.
    Bibtex
      @article{journals/jss/FiutemTAM99,
      author = {Fiutem, Roberto and Tonella, Paolo and Antoniol, Giuliano and Merlo, Ettore},
      title = {Points-to analysis for program understanding},
      journal = {Journal of Systems and Software},
      volume = {44},
      number = {3},
      year = {1999},
      pages = {213-227},
      ee = {http://dx.doi.org/10.1016/S0164-1212(98)10058-4},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Program understanding activities are more difficult for programs written in languages (such as C) that heavily make use of pointers for data structure manipulation because the programmer needs to build a mental model of the memory use and of the pointers to its locations. Pointers also pose additional problems to the tools supporting program understanding since they introduce additional dependences that have to be accounted for. This paper extends the flow insensitive context insensitive points-to analysis algorithm proposed by Steensgaard to cover arbitrary combinations of pointer dereferences array subscripts and field selections. It exhibits interesting properties among which scalability resulting from the low complexity and good performances. The results of the analysis are valuable by themselves as their graphical display represents the points-to links between locations. They are also integrated with other program understanding techniques like e.g. call graph construction slicing plan recognition and architectural recovery. The use of this algorithm in the framework of the program understanding environment CANTO is discussed.
  168. [168]G. Antoniol, G. Canfora, and A. D. Lucia, “Maintaining Traceability During Object-Oriented Software Evolution: A Case Study,” in ICSM, 1999, pp. 211–219.
    Bibtex
      @inproceedings{conf/icsm/AntoniolCL99,
      author = {Antoniol, Giuliano and Canfora, Gerardo and Lucia, Andrea De},
      title = {Maintaining Traceability During Object-Oriented Software Evolution: A Case Study},
      booktitle = {ICSM},
      year = {1999},
      pages = {211-219},
      ee = {http://computer.org/proceedings/icsm/0016/00160211abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  169. [169]G. Antoniol, G. Canfora, and A. D. Lucia, “Estimating the Size of Changes for Evolving Object Oriented Systems: A Case Study,” in IEEE METRICS, 1999, p. 250-.
    Bibtex
      @inproceedings{00809746,
      author = {Antoniol, Giuliano and Canfora, Gerardo and Lucia, Andrea De},
      title = {Estimating the Size of Changes for Evolving Object Oriented Systems: A Case Study},
      booktitle = {IEEE METRICS},
      year = {1999},
      pages = {250-},
      ee = {http://doi.ieeecomputersociety.org/10.1109/METRIC.1999.809746},
      crossref = {DBLP:conf/metrics/1999},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {1999/00809746.pdf}
    }
    
      
    Abstract Size related measures have traditionally been the basis for effort estimation models to predict costs of software activities along the entire software product life cycle. Object-Oriented (OO) systems are developed and evolve by adding/removing new classes and modifying existing entities. We propose an approach to predict the size of changes of evolving OO systems based on the analysis of the classes impacted by a change request. Our approach can be used both in iterative development processes or during software maintenance. A first empirical evaluation of the proposed approach has been obtained by applying our tools to the post-release evolution of OO software systems available on the net. The systems were analyzed and models to predict added/modified LOCs from added/modified classes were statistically validated. In the paper preliminary results of the above outlined evaluation is presented.
  170. [170]G. Antoniol, F. Calzolari, L. Cristoforetti, R. Fiutem, and G. Caldiera, “Adapting Function Points to Object-Oriented Information Systems,” in CAiSE, 1998, pp. 59–76.
    Bibtex
      @inproceedings{conf/caise/AntoniolCCFC98,
      author = {Antoniol, Giuliano and Calzolari, F. and Cristoforetti, L. and Fiutem, Roberto and Caldiera, Gianluigi},
      title = {Adapting Function Points to Object-Oriented Information Systems},
      booktitle = {CAiSE},
      year = {1998},
      pages = {59-76},
      ee = {http://dx.doi.org/10.1007/BFb0054219},
      crossref = {DBLP:conf/caise/1998},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  171. [171]R. Fiutem and G. Antoniol, “Identifying Design-Code Inconsistencies in Object-Oriented Software: A Case Study,” in ICSM, 1998, p. 94-.
    Bibtex
      @inproceedings{conf/icsm/FiutemA98,
      author = {Fiutem, Roberto and Antoniol, Giuliano},
      title = {Identifying Design-Code Inconsistencies in Object-Oriented Software: A Case Study},
      booktitle = {ICSM},
      year = {1998},
      pages = {94-},
      ee = {http://computer.org/proceedings/icsm/8779/87790094abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Traceability is a key issue to ensure consistency among software artifacts of subsequent phases of the development cycle. However few works have addressed the theme of tracing object oriented design into its software. This paper presents an approach to check the compliance of OO design with respect to source code. The process works on design artefacts expressed in OMT notation and accepts C++ source code. It recovers an “as is” design from the code compares recovered design with the actual design and helps the user to deal with inconsistency by pointing out regions of code which do not match with design. The recovery process exploits regular expression and edit distance to bridge the gap between code and design. Results as well as consideration related to presentation issues are reported in the paper.
  172. [172]F. Calzolari, P. Tonella, and G. Antoniol, “Modeling Maintenance Effort by Means of Dynamic Systems,” in CSMR, 1998, pp. 150–156.
    Bibtex
      @inproceedings{conf/csmr/CalzolariTA98,
      author = {Calzolari, F. and Tonella, Paolo and Antoniol, Giuliano},
      title = {Modeling Maintenance Effort by Means of Dynamic Systems},
      booktitle = {CSMR},
      year = {1998},
      pages = {150-156},
      ee = {http://dx.doi.org/10.1109/CSMR.1998.665787, http://doi.ieeecomputersociety.org/10.1109/CSMR.1998.665787},
      crossref = {DBLP:conf/csmr/1998},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract The dynamic evolution of ecological systems in which predators and preys compete for surviving has been investigated by applying suitable mathematical models. Dynamic systems theory provides a useful way to model interspecie competition and thus the evolution of predators and preys populations. This kind of mathematical framework has been shown to be well suited to describe evolution of economical systems as well where instead of predators and preys there are consumers and resources. This paper suggests how dynamic systems could be usefully applied to maintenance context namely to model the dynamic evolution of maintenance effort. When maintainers starts trying to recognize and correct code defects while the number of residual defects decreases the effort spent to find out any new defect has an initial increase followed by a decline in a similar way as preys and predators populations do. The feasibility of this approach is supported by the experimental data about a 67 months maintenance task of a software project and its successive releases.
  173. [173]G. Caldiera, G. Antoniol, R. Fiutem, and C. J. Lokan, “Definition and Experimental Evaluation of Function Points for Object-Oriented Systems,” in IEEE METRICS, 1998, p. 167-.
    Bibtex
      @inproceedings{conf/metrics/CaldieraAFL98,
      author = {Caldiera, Gianluigi and Antoniol, Giuliano and Fiutem, Roberto and Lokan, Christopher J.},
      title = {Definition and Experimental Evaluation of Function Points for Object-Oriented Systems},
      booktitle = {IEEE METRICS},
      year = {1998},
      pages = {167-},
      ee = {http://doi.ieeecomputersociety.org/10.1109/METRIC.1998.731242},
      crossref = {DBLP:conf/metrics/1998},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract We present a method for estimating the size, and consequently effort and duration, of object oriented software development projects. Different estimates may be made in different phases of the development process, according to the available information. We define an adaptation of traditional function points, called Object Oriented Function Points, to enable the measurement of object oriented analysis and design specifications. Tools have been constructed to automate the counting method. The novel aspect of our method is its flexibility. An organisation can experiment with different counting policies, to find the most accurate predictors of size, effort, etc. in its environment. The method and preliminary results of its application in an industrial environment are presented and discussed.
  174. [174]G. Antoniol, R. Fiutem, and L. Cristoforetti, “Using Metrics to Identify Design Patterns in Object-Oriented Software,” in IEEE METRICS, 1998, p. 23-.
    Bibtex
      @inproceedings{00731224,
      author = {Antoniol, Giuliano and Fiutem, Roberto and Cristoforetti, L.},
      title = {Using Metrics to Identify Design Patterns in Object-Oriented Software},
      booktitle = {IEEE METRICS},
      year = {1998},
      pages = {23-},
      ee = {http://doi.ieeecomputersociety.org/10.1109/METRIC.1998.731224},
      crossref = {DBLP:conf/metrics/1998},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {1998/00731224.pdf}
    }
    
      
    Abstract Object-Oriented design patterns are an emergent technology: they are reusable micro-architectures high level building blocks. This paper presents a conservative approach based on a multi-stage reduction strategy using OO software metrics and structural properties to extract structural design patterns from OO design or code. Code and design are mapped into an intermediate representation called Abstract Object Language to maintain independence from the programming language and the adopted CASE tools. To assess the effectiveness of the pattern recovery process a portable environment written in Java remotely accessible by means of any WEB browser has been developed. Based on this environment experimental results obtained on public domain and industrial software are discussed in the paper.
  175. [175]F. Calzolari, P. Tonella, and G. Antoniol, “Dynamic Model for Maintenance and Testing Effort,” in ICSM, 1998, pp. 104–112.
    Bibtex
      @inproceedings{conf/icsm/CalzolariTA98,
      author = {Calzolari, F. and Tonella, Paolo and Antoniol, Giuliano},
      title = {Dynamic Model for Maintenance and Testing Effort},
      booktitle = {ICSM},
      year = {1998},
      pages = {104-112},
      ee = {http://computer.org/proceedings/icsm/8779/87790104abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract The dynamic evolution of ecological systems in which predators and prey compete for surviving has been investigated by applying suitable mathematical models. Dynamic systems theory provides a useful way to model interspecie competition and thus the evolution of predators and prey populations. This kind of mathematical framework has been shown to be well suited to describe evolution of economical systems as well where instead of predators and prey there are consumers and resources. Maintenance and testing activities absorbe the most relevant part of total life-cycle cost of software. Such economic relevance strongly suggests to investigate the maintenance and testing processes in order to find new models allowing software engineers to better estimate plan and manage costs and activities. In this paper we show how dynamic systems theory could be usefully applied to maintenance and testing context namely to model the dynamic evolution of the effort. When programmers start trying to recognize and correct code defects while the number of residual defects decreases the effort spent to find out any new defect has an initial increase followed by a decline in a similar way as prey and predators populations do.
  176. [176]G. Antoniol, R. Fiutem, and L. Cristoforetti, “Design Pattern Recovery in Object-Oriented Software,” in IWPC, 1998, p. 153-.
    Bibtex
      @inproceedings{conf/iwpc/AntoniolFC98,
      author = {Antoniol, Giuliano and Fiutem, Roberto and Cristoforetti, L.},
      title = {Design Pattern Recovery in Object-Oriented Software},
      booktitle = {IWPC},
      year = {1998},
      pages = {153-},
      ee = {http://dlib2.computer.org/conferen/iwpc/8560/pdf/85600153.pdf},
      crossref = {DBLP:conf/iwpc/1998},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  177. [177]P. Tonella, G. Antoniol, R. Fiutem, and E. Merlo, “Points-to Analysis for Program Understanding,” in WPC, 1997, p. 90-.
    Bibtex
      @inproceedings{conf/iwpc/TonellaAFM97,
      author = {Tonella, Paolo and Antoniol, Giuliano and Fiutem, Roberto and Merlo, Ettore},
      title = {Points-to Analysis for Program Understanding},
      booktitle = {WPC},
      year = {1997},
      pages = {90-},
      ee = {http://computer.org/proceedings/wpc/7993/79930090abs.htm},
      crossref = {DBLP:conf/iwpc/1997},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract Program understanding activities are more difficult for programs written in languages (such as C) that heavily make use of pointers for data structure manipulation because the programmer needs to build a mental model of the memory use and of the pointers to its locations. Pointers also pose additional problems to the tools supporting program understanding since they introduce additional dependences that have to be accounted for. This paper extends the flow insensitive context insensitive points-to analysis algorithm proposed by Steensgaard to cover arbitrary combinations of pointer dereferences array subscripts and field selections. It exhibits interesting properties among which scalability resulting from the low complexity and good performances. The results of the analysis are valuable by themselves as their graphical display represents the points-to links between locations. They are also integrated with other program understanding techniques like e.g. call graph construction slicing plan recognition and architectural recovery. The use of this algorithm in the framework of the program understanding environment CANTO is discussed.
  178. [178]P. Tonella, G. Antoniol, R. Fiutem, and E. Merlo, “Variable Precision Reaching Definitions Analysis for Software Maintenance,” in CSMR, 1997, pp. 60–67.
    Bibtex
      @inproceedings{conf/csmr/TonellaAFM97,
      author = {Tonella, Paolo and Antoniol, Giuliano and Fiutem, Roberto and Merlo, Ettore},
      title = {Variable Precision Reaching Definitions Analysis for Software Maintenance},
      booktitle = {CSMR},
      year = {1997},
      pages = {60-67},
      ee = {http://dx.doi.org/10.1109/CSMR.1997.583007, http://doi.ieeecomputersociety.org/10.1109/CSMR.1997.583007},
      crossref = {DBLP:conf/csmr/1997},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract A flow analyzer can be very helpful in the process of program understanding by providing the programmer with different views of the code. As the documentation is often incomplete or inconsistent it is extremely useful to extract the information a programmer may need directly from the code. Program understanding activities are interactive thus program analysis tools may be asked for quick answers by the maintainer. Therefore the control on the trade-off between accuracy and efficiency should be given to the user. This paper presents an approach to interprocedural reaching definitions flow analysis based on three levels of precision depending on the sensitivity to the calling context and the control flow. A lower precision degree produces an overestimate of the data dependences in a program. The result is anyhow conservative (all dependences which hold are surely reported) and definitely faster than the more accurate counterparts. A tool supporting reaching definition analysis in the three variants has been developed. The results on a test suite show that three orders of magnitude can be gained in execution times by the less accurate analysis but 57.4 % extra dependences are on average added. The intermediate variant is much more precise (1.6 % extra dependences) but gains less in times (one order of magnitude)
  179. [179]G. Antoniol, R. Fiutem, G. Lutteri, P. Tonella, S. Zanfei, and E. Merlo, “Program Understanding and Maintenance with the CANTO Environment,” in ICSM, 1997, p. 72-.
    Bibtex
      @inproceedings{05726937,
      author = {Antoniol, Giuliano and Fiutem, Roberto and Lutteri, G. and Tonella, Paolo and Zanfei, S. and Merlo, Ettore},
      title = {Program Understanding and Maintenance with the CANTO Environment},
      booktitle = {ICSM},
      year = {1997},
      pages = {72-},
      ee = {http://doi.ieeecomputersociety.org/10.1109/ICSM.1997.624233},
      crossref = {DBLP:conf/icsm/1997},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {1997/05726937.pdf}
    }
    
      
    Abstract During, maintenance activities the availability of integrated conceptual views that present software at different levels of abstraction from software architecture to control and data flow relations at code level is fundamental to understand and modify legacy systems. This paper presents CANTO a comprehensive program understanding and maintenance environment which integrates fine grained information with architectural views extracted from source code giving the user control on what is being computed by analyses. The capabilities and usefulness of CANTO are illustrated with reference to a real understanding and maintenance task.
  180. [180]P. Tonella, G. Antoniol, R. Fiutem, and E. Merlo, “Flow Insensitive C++ Pointers and Polymorphism Analysis and its Application to Slicing,” in ICSE, 1997, pp. 433–443.
    Bibtex
      @inproceedings{p433-tonella,
      author = {Tonella, Paolo and Antoniol, Giuliano and Fiutem, Roberto and Merlo, Ettore},
      title = {Flow Insensitive C++ Pointers and Polymorphism Analysis and its Application to Slicing},
      booktitle = {ICSE},
      year = {1997},
      pages = {433-443},
      ee = {http://doi.acm.org/10.1145/253228.253371},
      crossref = {DBLP:conf/icse/1997},
      bibsource = {DBLP, http://dblp.uni-trier.de},
      pdf = {1997/p433-tonella.pdf}
    }
    
      
    Abstract Large software systems are difficult to understand and maintain. Code analysis tools can provide programmers with different views of the software which may help their understanding activity. To be applicable to real programs written in modern programming languages these tools need to efficiently handle pointers. In the case of C++ analysis object oriented peculiarities (like e.g. polymorphism) have to be accounted for as well. We propose a flow insensitive context insensitive points-to analysis capable of dealing with the features of the object oriented code. It is extremely promising because of the positive trade-off between complexity and accuracy. The integration of the points-to results with other analyses such as reaching definitions and slicing is also discussed in the context of our program understanding environment.
  181. [181]P. Tonella, R. Fiutem, G. Antoniol, and E. Merlo, “Augmenting Pattern-Based Architectural Recovery with Flow Analysis: Mosaic -A Case Study,” in WCRE, 1996, pp. 198–207.
    Bibtex
      @inproceedings{conf/wcre/TonellaFAM96,
      author = {Tonella, Paolo and Fiutem, Roberto and Antoniol, Giuliano and Merlo, Ettore},
      title = {Augmenting Pattern-Based Architectural Recovery with Flow Analysis: Mosaic -A Case Study},
      booktitle = {WCRE},
      year = {1996},
      pages = {198-207},
      ee = {http://computer.org/proceedings/wcre/7674/76740198abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  182. [182]R. Fiutem, P. Tonella, G. Antoniol, and E. Merlo, “A Cliche’-Based Environment to Support Architectural Reverse Engineering,” in ICSM, 1996, pp. 319–328.
    Bibtex
      @inproceedings{conf/icsm/FiutemTAM96,
      author = {Fiutem, Roberto and Tonella, Paolo and Antoniol, Giuliano and Merlo, Ettore},
      title = {A Cliche'-Based Environment to Support Architectural Reverse Engineering},
      booktitle = {ICSM},
      year = {1996},
      pages = {319-328},
      ee = {http://computer.org/proceedings/icsm/7677/76770319abs.htm},
      crossref = {DBLP:conf/icsm/1996},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  183. [183]R. Fiutem, E. Merlo, G. Antoniol, and P. Tonella, “Understanding the architecture of software systems,” in WPC, 1996, p. 187-.
    Bibtex
      @inproceedings{conf/iwpc/FiutemMAT96,
      author = {Fiutem, Roberto and Merlo, Ettore and Antoniol, Giuliano and Tonella, Paolo},
      title = {Understanding the architecture of software systems},
      booktitle = {WPC},
      year = {1996},
      pages = {187-},
      ee = {http://computer.org/proceedings/wpc/7283/72830187abs.htm},
      crossref = {DBLP:conf/iwpc/1996},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  184. [184]R. Fiutem, P. Tonella, G. Antoniol, and E. Merlo, “A Cliche-Based Environment to Support Architectural Reverse Engineering,” in WCRE, 1996, pp. 277–286.
    Bibtex
      @inproceedings{conf/wcre/FiutemTAM96,
      author = {Fiutem, Roberto and Tonella, Paolo and Antoniol, Giuliano and Merlo, Ettore},
      title = {A Cliche-Based Environment to Support Architectural Reverse Engineering},
      booktitle = {WCRE},
      year = {1996},
      pages = {277-286},
      ee = {http://computer.org/proceedings/wcre/7674/76740277abs.htm},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  185. [185]G. Antoniol, R. Fiutem, E. Merlo, and P. Tonella, “Application and user interface migration from BASIC to Visual C++,” in ICSM, 1995, p. 76-.
    Bibtex
      @inproceedings{conf/icsm/AntoniolFMT95,
      author = {Antoniol, Giuliano and Fiutem, Roberto and Merlo, Ettore and Tonella, Paolo},
      title = {Application and user interface migration from BASIC to Visual C++},
      booktitle = {ICSM},
      year = {1995},
      pages = {76-},
      ee = {http://computer.org/proceedings/icsm/7141/71410076abs.htm},
      crossref = {DBLP:conf/icsm/1995},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract In this paper an approach to reengineer BASIC PC legacy code into modern graphical systems is proposed. BASIC has historically been one of the first languages available on PCs. Based on it small or medium size companies have developed throughout the time systems that represent valuable company assets to be preserved. Our goal is the automatic migration from the BASIC character oriented user interface to a graphical environment which includes a GUI builder and compiles event driven C/C++ code. For this purpose a conceptual representation in terms of abstract graphical objects and callbacks was inferred from the original code and a translator from BASIC to C was developed. Moreover the GUI builder internal representation was generated so that the user interface can be interactively fine-tuned by the programmer. We present and discuss BASIC peculiarities with preliminary results on code translation. For the explanation of our approach to user interface migration an example are used throughout the text.
  186. [186]M. Federico, M. Cettolo, F. Brugnara, and G. Antoniol, “Language modelling for efficient beam-search,” Computer Speech & Language, vol. 9, no. 4, pp. 353–379, 1995.
    Bibtex
      @article{journals/csl/FedericoCBA95,
      author = {Federico, Marcello and Cettolo, Mauro and Brugnara, Fabio and Antoniol, Giuliano},
      title = {Language modelling for efficient beam-search},
      journal = {Computer Speech {\&} Language},
      volume = {9},
      number = {4},
      year = {1995},
      pages = {353-379},
      ee = {http://dx.doi.org/10.1006/csla.1995.0017},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract This paper considers the problem of estimating bigram language models and of efficiently representing them by a finite state network which can be employed by an HMM based beam-search continuous speech recognizer. A review of the best known bigram estimation techniques is given together with description of the original Stacked model. LM comparisons in terms of perplexity are given for three text corpora with different data sparseness conditions while speech recognition accuracy tests are presented for a 10.000-word real-time speaker independent dictation task. The Stacked estimation method favorably compares with the best ones by achieving about 93% of word accuracy. If better language model estimates can improve recognition accuracy representations better suited to the search algorithm can improve its speed as well. Two static representations of language models are introduced: linear and tree-based. Results show that the latter organization is better exploited by the beam-search algorithm as it provides 5 time faster response with the same accuracy. Finally an off-line reduction algorithm is presented that cuts the space requirements of the tree-based topology to about 40%. The solutions proposed here was successfully employed in a real-time speaker independent 10.000-word real-time dictation system for radiological reporting.
  187. [187]G. Antoniol, F. Brugnara, M. Cettolo, and M. Federico, “Language model estimations and representations for real-time continuous speech recognition,” 1994.
    Bibtex
      @inproceedings{conf/interspeech/AntoniolBCF94,
      author = {Antoniol, Giuliano and Brugnara, Fabio and Cettolo, Mauro and Federico, Marcello},
      title = {Language model estimations and representations for real-time continuous speech recognition},
      booktitle = {ICSLP},
      year = {1994},
      ee = {http://www.isca-speech.org/archive/icslp_1994/i94_0859.html},
      crossref = {DBLP:conf/interspeech/1994},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract This paper compares different ways of estimating bi- gram language models and of representing them in a finite state network used by a beam-search based, con- tinuous speech, and speaker independent HMM recog- nizer. Attention is focused on the n-gram interpolation scheme for which seven models are considered. Among them, the Stacked estimated linear interpolated model favourably compares with the best known ones. Fur- ther, two different static representations of the search space are investigated: “linear” and “tree-based”. Re- sults show that the latter topology is better suited to the beam-search algorithm. Moreover, this represen- tation can be reduced by a network optimization tech- nique, which allows the dynamic size of the recognition process to be decreased by 60%. Extensive recognition experiments on a 10,000-word dictation task with four speakers are described in which an average word accu- racy of 93% is achieved with real-time response.
  188. [188]B. Angelini et al., “Radiological reporting by speech recognition: the a.re.s. system,” 1994.
    Bibtex
      @inproceedings{conf/interspeech/AngeliniABCFFL94,
      author = {Angelini, Bianca and Antoniol, Giuliano and Brugnara, Fabio and Cettolo, Mauro and Federico, Marcello and Fiutem, Roberto and Lazzari, Gianni},
      title = {Radiological reporting by speech recognition: the a.re.s. system},
      booktitle = {ICSLP},
      year = {1994},
      ee = {http://www.isca-speech.org/archive/icslp_1994/i94_1267.html},
      crossref = {DBLP:conf/interspeech/1994},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  189. [189]G. Antoniol, M. Cettolo, and M. Federico, “Techniques for robust recognition in restricted domains,” 1993.
    Bibtex
      @inproceedings{conf/interspeech/AntoniolCF93,
      author = {Antoniol, Giuliano and Cettolo, Mauro and Federico, Marcello},
      title = {Techniques for robust recognition in restricted domains},
      booktitle = {EUROSPEECH},
      year = {1993},
      ee = {http://www.isca-speech.org/archive/eurospeech_1993/e93_2219.html},
      crossref = {DBLP:conf/interspeech/1993},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
    Abstract This paper describes an Automatic Speech Understanding (ASU) system used in a human-robot interface for the re- mote control of a mobile robot. The intended application is that of an operator issuing telecontrol commands to one or more robots from a remote workstation. ASU is sup- posed to be performed with spontaneous continuous speech and quasi real time conditions. Training and testing of the system was based on speech data collected by means of Wizard of Oz simulations. Two kinds of robustness factors are introduced: the first is a recognition error-tolerant ap- proach to semantic interpretation, the second is based on a technique for evaluating the reliability of the ASU system output with respect to the input utterance. Preliminary re- sults are 90.9% of correct semantic interpretations, and 89.1% of correct detection of out-of-domain sentences at the cost of rejecting 16.4% of correct in-domain sentences.
  190. [190]G. Antoniol, R. Fiutem, R. Flor, and G. Lazzari, “Radiological Reporting Based on Voice Recognition,” in EWHCI, 1993, pp. 242–253.
    Bibtex
      @inproceedings{conf/ewhci/AntoniolFFL93,
      author = {Antoniol, Giuliano and Fiutem, Roberto and Flor, R. and Lazzari, Gianni},
      title = {Radiological Reporting Based on Voice Recognition},
      booktitle = {EWHCI},
      year = {1993},
      pages = {242-253},
      ee = {http://dx.doi.org/10.1007/3-540-57433-6_53},
      crossref = {DBLP:conf/ewhci/1993},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  191. [191]G. Antoniol, F. Brugnara, and D. Giuliani, “Admissible strategies for acoustic matching with a large vocabulary,” 1991.
    Bibtex
      @inproceedings{conf/interspeech/AntoniolBG91,
      author = {Antoniol, Giuliano and Brugnara, Fabio and Giuliani, Diego},
      title = {Admissible strategies for acoustic matching with a large vocabulary},
      booktitle = {EUROSPEECH},
      year = {1991},
      ee = {http://www.isca-speech.org/archive/eurospeech_1991/e91_0589.html},
      crossref = {DBLP:conf/interspeech/1991},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
      
  192. [192]G. Antoniol, F. Brugnara, F. D. Palma, G. Lazzari, and E. Moser, “A. RE. s. : an interface for automatic reporting by speech,” 1991.
    Bibtex
      @inproceedings{conf/interspeech/AntoniolBPLM91,
      author = {Antoniol, Giuliano and Brugnara, Fabio and Palma, F. Dalla and Lazzari, Gianni and Moser, E.},
      title = {A. RE. s. : an interface for automatic reporting by speech},
      booktitle = {EUROSPEECH},
      year = {1991},
      ee = {http://www.isca-speech.org/archive/eurospeech_1991/e91_0973.html},
      crossref = {DBLP:conf/interspeech/1991},
      bibsource = {DBLP, http://dblp.uni-trier.de}
    }