@article {HKF17, title = {Empirical Evaluation of Software Maintainability Based on a Manually Validated Refactoring Dataset}, journal = {Information and Software Technology}, volume = {95}, year = {2018}, note = {Accepted, to appear.}, month = {nov}, chapter = {313}, abstract = {Context: Refactoring is a technique for improving the internal structure of software systems. It has a solid theoretical background while being used in development practice also. However, we lack empirical research results on the real effect of code refactoring and its application. Objective: This paper presents a manually validated subset of a previously published dataset containing the refactorings extracted by the RefFinder tool, code metrics, and maintainability of 7 open-source systems. We found that RefFinder had around 27\% overall average precision on the subject systems, thus our manually validated subset has substantial added value. Using the dataset, we studied several aspects of the refactored and non-refactored source code elements (classes and methods), like the differences in their maintainability and source code metrics. Method: We divided the source code elements into a group containing the refactored elements and a group with non-refactored elements. We analyzed the elements{\textquoteright} characteristics in these groups using correlation analysis, Mann-Whitney U test and effect size measures. Results: Source code elements subjected to refactorings had significantly lower maintainability than elements not affected by refactorings. Moreover, refactored elements had significantly higher size related metrics, complexity, and coupling. Also these metrics changed more significantly in the refactored elements. The results are mostly in line with our previous findings on the not validated dataset, with the difference that clone metrics had no strong connection with refactoring. Conclusions: Compared to the preliminary analysis using a not validated dataset, the manually validated dataset led to more significant results, which suggests that developers find targets for refactorings based on some internal quality properties of the source code, like their size, complexity or coupling, but not clone related metrics as reported in our previous studies. They do not just use these properties for identifying targets, but also control them with refactorings.}, keywords = {Code refactoring, Empirical study, Manually validated empirical dataset, Software maintainability, Source code metrics}, issn = {0950-5849}, doi = {10.1016/j.infsof.2017.11.012}, url = {http://www.sciencedirect.com/science/article/pii/S0950584916303561}, author = {P Heged{\H u}s and K{\'a}d{\'a}r, Istv{\'a}n and Rudolf Ferenc and Tibor Gyim{\'o}thy} }