@article {HKF17, title = {Empirical Evaluation of Software Maintainability Based on a Manually Validated Refactoring Dataset}, journal = {Information and Software Technology}, volume = {95}, year = {2018}, note = {Accepted, to appear.}, month = {nov}, chapter = {313}, abstract = {Context: Refactoring is a technique for improving the internal structure of software systems. It has a solid theoretical background while being used in development practice also. However, we lack empirical research results on the real effect of code refactoring and its application. Objective: This paper presents a manually validated subset of a previously published dataset containing the refactorings extracted by the RefFinder tool, code metrics, and maintainability of 7 open-source systems. We found that RefFinder had around 27\% overall average precision on the subject systems, thus our manually validated subset has substantial added value. Using the dataset, we studied several aspects of the refactored and non-refactored source code elements (classes and methods), like the differences in their maintainability and source code metrics. Method: We divided the source code elements into a group containing the refactored elements and a group with non-refactored elements. We analyzed the elements{\textquoteright} characteristics in these groups using correlation analysis, Mann-Whitney U test and effect size measures. Results: Source code elements subjected to refactorings had significantly lower maintainability than elements not affected by refactorings. Moreover, refactored elements had significantly higher size related metrics, complexity, and coupling. Also these metrics changed more significantly in the refactored elements. The results are mostly in line with our previous findings on the not validated dataset, with the difference that clone metrics had no strong connection with refactoring. Conclusions: Compared to the preliminary analysis using a not validated dataset, the manually validated dataset led to more significant results, which suggests that developers find targets for refactorings based on some internal quality properties of the source code, like their size, complexity or coupling, but not clone related metrics as reported in our previous studies. They do not just use these properties for identifying targets, but also control them with refactorings.}, keywords = {Code refactoring, Empirical study, Manually validated empirical dataset, Software maintainability, Source code metrics}, issn = {0950-5849}, doi = {10.1016/j.infsof.2017.11.012}, url = {http://www.sciencedirect.com/science/article/pii/S0950584916303561}, author = {P Heged{\H u}s and K{\'a}d{\'a}r, Istv{\'a}n and Rudolf Ferenc and Tibor Gyim{\'o}thy} } @conference {KHF16c, title = {Assessment of the Code Refactoring Dataset Regarding the Maintainability of Methods}, booktitle = {Proceedings of the 16th International Conference on Computational Science and Its Applications (ICCSA 2016)}, year = {2016}, month = {jul}, pages = {610{\textendash}624}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, address = {Beijing, China}, abstract = {Code refactoring has a solid theoretical background while being used in development practice at the same time. However, previous works found controversial results on the nature of code refactoring activities in practice. Both their application context and impact on code quality needs further examination. Our paper encourages the investigation of code refactorings in practice by providing an excessive open dataset of source code metrics and applied refactorings through several releases of 7 open-source systems. We already demonstrated the practical value of the dataset by analyzing the quality attributes of the refactored source code classes and the values of source code metrics improved by those refactorings. In this paper, we have gone one step deeper and explored the effect of code refactorings at the level of methods. We found that similarly to class level, lower maintainability indeed triggers more code refactorings in practice at the level of methods and these refactorings significantly decrease size, coupling and clone metrics.}, keywords = {Code refactoring, Empirical study, Refactoring dataset, Software maintainability}, doi = {10.1007/978-3-319-42089-9_43}, url = {https://link.springer.com/chapter/10.1007\%2F978-3-319-42089-9_43}, author = {K{\'a}d{\'a}r, Istv{\'a}n and P Heged{\H u}s and Rudolf Ferenc and Tibor Gyim{\'o}thy} } @conference {KHF16, title = {A Code Refactoring Dataset and Its Assessment Regarding Software Maintainability}, booktitle = {Proceedings of the 23rd IEEE International Conference on Software Analysis, Evolution, and Reengineering (SANER 2016)}, year = {2016}, pages = {599{\textendash}603}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Suita, Osaka, Japan}, abstract = {It is very common in various fields that there is a gap between theoretical results and their practical applications. This is true for code refactoring as well, which has a solid theoretical background while being used in development practice at the same time. However, more and more studies suggest that developers perform code refactoring entirely differently than the theory would suggest. Our paper encourages the further investigation of code refactorings in practice by providing an excessive open dataset of source code metrics and applied refactorings through several releases of 7 open-source systems. As a first step of processing this dataset, we examined the quality attributes of the refactored source code classes and the values of source code metrics improved by those refactorings. Our early results show that lower maintainability indeed triggers more code refactorings in practice and these refactorings significantly decrease complexity, code lines, coupling and clone metrics. However, we observed a decrease in comment related metrics in the refactored code.}, keywords = {Code refactoring, Empirical study, Software maintainability}, doi = {10.1109/SANER.2016.42}, url = {http://ieeexplore.ieee.org/document/7476680/}, author = {K{\'a}d{\'a}r, Istv{\'a}n and P Heged{\H u}s and Rudolf Ferenc and Tibor Gyim{\'o}thy} } @conference {KHF16b, title = {A Manually Validated Code Refactoring Dataset and Its Assessment Regarding Software Maintainability}, booktitle = {Proceedings of the 12th ACM International Conference on Predictive Models and Data Analytics in Software Engineering (PROMISE 2016)}, year = {2016}, month = {sep}, pages = {10:1{\textendash}10:4}, publisher = {ACM}, organization = {ACM}, address = {Ciudad Real, Spain}, abstract = {Refactoring is a popular technique for improving the internal structure of software systems. It has a solid theoretical background while being used in development practice at the same time. However, we lack empirical research results on the real effect of code refactoring and its ways of application. This paper presents a manually validated dataset of applied refactorings and source code metrics and maintainability of 7 open-source systems. It is a subset of our previously published dataset containing the refactoring instances automatically extracted by the RefFinder tool. We found that RefFinder had around 27\% overall average precision on the subject systems, thus our new {\textendash} manually validated {\textendash} subset has substantial added value allowing researchers to perform more accurate empirical investigations. Using this data, we were able to study whether refactorings were really triggered by poor maintainability of the code, or by other aspects. The results show that source code elements subject to refactorings had significantly lower maintainability values (approximated by source code metric aggregation) than elements not affected by refactorings between two releases.}, keywords = {Code refactoring, Empirical study, Manually validated empirical dataset, Software maintainability}, doi = {10.1145/2972958.2972962}, url = {https://dl.acm.org/citation.cfm?doid=2972958.2972962}, author = {K{\'a}d{\'a}r, Istv{\'a}n and P Heged{\H u}s and Rudolf Ferenc and Tibor Gyim{\'o}thy} } @conference {SNH15, title = {Do Automatic Refactorings Improve Maintainability? An Industrial Case Study}, booktitle = {Proceedings of the 31st International Conference on Software Maintenance and Evolution (ICSME 2015)}, year = {2015}, pages = {429{\textendash}438}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Bremen, Germany}, abstract = {Refactoring is often treated as the main remedy against the unavoidable code erosion happening during software evolution. Studies show that refactoring is indeed an elemental part of the developers{\textquoteright} arsenal. However, empirical studies about the impact of refactorings on software maintainability still did not reach a consensus. Moreover, most of these empirical investigations are carried out on open-source projects where distinguishing refactoring operations from other development activities is a challenge in itself. We had a chance to work together with several software development companies in a project where they got extra budget to improve their source code by performing refactoring operations. Taking advantage of this controlled environment, we collected a large amount of data during a refactoring phase where the developers used a (semi)automatic refactoring tool. By measuring the maintainability of the involved subject systems before and after the refactorings, we got valuable insights into the effect of these refactorings on large-scale industrial projects. All but one company, who applied a special refactoring strategy, achieved a maintainability improvement at the end of the refactoring phase, but even that one company suffered from the negative impact of only one type of refactoring.}, keywords = {automatic refactoring, coding issues, ISO/IEC 25010, Software maintainability}, doi = {10.1109/ICSM.2015.7332494}, url = {http://ieeexplore.ieee.org/document/7332494/}, author = {Sz{\H o}ke, G{\'a}bor and Csaba Nagy and P Heged{\H u}s and Rudolf Ferenc and Tibor Gyim{\'o}thy} } @article {FHV14, title = {Connection Between Version Control Operations and Quality Change of the Source Code}, journal = {Acta Cybernetica}, volume = {21}, number = {4}, year = {2014}, pages = {585{\textendash}607}, abstract = {Software erosion is a well-known phenomena, meaning that software quality is continuously decreasing due to the ever-ongoing modifications in the source code. In this research work we investigated this phenomena by studying the impact of version control commit operations (add, update, delete) on the quality of the code. We calculated the ISO/IEC 9126 quality attributes for thousands of revisions of an industrial and three open-source software systems with the help of the Columbus Quality Model. We also collected the cardinality of each version control operation type for every investigated revision. We performed Chisquared tests on contingency tables with rows of quality change and columns of version control operation commit types. We compared the results with random data as well. We identified that the relationship between the version control operations and quality change is quite strong. Great maintainability improvements are mostly caused by commits containing Add operation. Commits containing file updates only tend to have a negative impact on the quality. Deletions have a weak connection with quality, and we could not formulate a general statement.}, keywords = {Case Study, ISO/IEC 9126, Software Erosion, Software maintainability, Source Code Version Control}, issn = {0324-721X}, url = {http://publicatio.bibl.u-szeged.hu/8394/}, author = {Farag{\'o}, Csaba and P Heged{\H u}s and V{\'e}gh, {\'A}d{\'a}m Zolt{\'a}n and Rudolf Ferenc} } @conference {LHF14, title = {The Connection of the Bug Density and Maintainability of Classes}, booktitle = {8th International Workshop on Software Quality and Maintainability}, series = {SQM}, year = {2014}, note = {Presentation only}, month = {feb}, address = {Antwerp, Belgium}, abstract = {Measuring software product maintainability is a central issue in software engineering which led to a number of different practical quality models. Besides system level assessments it is also desirable that these models provide technical quality information at source code element level (e.g. classes, methods) to aid the improvement of the software. Although many existing models give an ordered list of source code elements that should be improved, it is unclear how these improvements affect other important quality indicators of the system, e.g. bug density. In this paper we empirically investigate the connection of bug numbers in the classes of different open-source systems and the class level maintainability measures of our ColumbusQM probabilistic quality model using open-access bug datasets. We show that classes with lower maintainability score contain significantly more bugs than more maintainable classes. Moreover, in terms of correctness and completeness, ColumbusQM competes with statistical and machine learning prediction models especially trained on the bug data using product metrics as predictors. This is a great achievement in the light of that our model needs no training and its purpose is different (e.g. to estimate testability, or development costs) than those of the bug prediction models. In summary, we show that improving the maintainability of a software according to the suggestions of our quality model will expectedly also reduce its bug density.}, keywords = {Bug density, Bug prediction, Class level maintainability, ColumbusQM, ISO/IEC 25010, Software maintainability}, author = {Lad{\'a}nyi, Gergely and P Heged{\H u}s and Rudolf Ferenc and Istv{\'a}n Siket and Tibor Gyim{\'o}thy} } @conference {FHF14, title = {The Impact of Version Control Operations on the Quality Change of the Source Code}, booktitle = {Proceedings of the 14th International Conference on Computational Science and Its Applications (ICCSA 2014)}, series = {Lecture Notes in Computer Science (LNCS)}, volume = {8583}, year = {2014}, month = {jun}, pages = {353{\textendash}369}, publisher = {Springer-Verlag}, organization = {Springer-Verlag}, address = {Guimar{\~a}es, Portugal}, abstract = {The number of software systems under development and maintenance is rapidly increasing. The quality of a system{\textquoteright}s source code tends to decrease during its lifetime which is a problem because maintaining low quality code consumes a big portion of the available efforts. In this research we investigated one aspect of code change, the version control commit operations (add, update, delete). We studied the impact of these operations on the maintainability of the code. We calculated the ISO/IEC 9126 quality attributes for thousands of revisions of an industrial and three open-source software systems. We also collected the cardinality of each version control operation type for every investigated revision. Based on these data, we identified that operation Add has a rather positive, while operation Update has a rather negative effect on the quality. On the other hand, for operation Delete we could not find a clear connection to quality change.}, keywords = {Case Study, ISO/IEC 9126, Software Erosion, Software maintainability, Source Code Version Control}, doi = {10.1007/978-3-319-09156-3_26}, url = {https://link.springer.com/chapter/10.1007\%2F978-3-319-09156-3_26}, author = {Farag{\'o}, Csaba and P Heged{\H u}s and Rudolf Ferenc} }