@conference {TGF16, title = {A Public Bug Database of {GitHub} Projects and Its Application in Bug Prediction}, booktitle = {Proceedings of the 16th International Conference on Computational Science and Its Applications (ICCSA 2016)}, year = {2016}, pages = {625{\textendash}638}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, address = {Beijing, China}, abstract = {Detecting defects in software systems is an evergreen topic, since there is no real world software without bugs. Many different bug locating algorithms have been presented recently that can help to detect hidden and newly occurred bugs in software. Papers trying to predict the faulty source code elements or code segments in the system always use experience from the past. In most of the cases these studies construct a database for their own purposes and do not make the gathered data publicly available. Public datasets are rare; however, a well constructed dataset could serve as a benchmark test input. Furthermore, open-source software development is rapidly increasing that also gives an opportunity to work with public data. In this study we selected 15 Java projects from GitHub to construct a public bug database from. We matched the already known and fixed bugs with the corresponding source code elements (classes and files) and calculated a wide set of product metrics on these elements. After creating the desired bug database, we investigated whether the built database is usable for bug prediction. We used 13 machine learning algorithms to address this research question and finally we achieved F-measure values between 0.7 and 0.8. Beside the F-measure values we calculated the bug coverage ratio on every project for every machine learning algorithm. We obtained very high and promising bug coverage values (up to 100\%).}, keywords = {Bug database, Bug prediction}, doi = {10.1007/978-3-319-42089-9_44}, url = {https://link.springer.com/chapter/10.1007\%2F978-3-319-42089-9_44}, author = {T{\'o}th, Zolt{\'a}n and Gyimesi, P{\'e}ter and Rudolf Ferenc} } @conference {LHF14, title = {The Connection of the Bug Density and Maintainability of Classes}, booktitle = {8th International Workshop on Software Quality and Maintainability}, series = {SQM}, year = {2014}, note = {Presentation only}, month = {feb}, address = {Antwerp, Belgium}, abstract = {Measuring software product maintainability is a central issue in software engineering which led to a number of different practical quality models. Besides system level assessments it is also desirable that these models provide technical quality information at source code element level (e.g. classes, methods) to aid the improvement of the software. Although many existing models give an ordered list of source code elements that should be improved, it is unclear how these improvements affect other important quality indicators of the system, e.g. bug density. In this paper we empirically investigate the connection of bug numbers in the classes of different open-source systems and the class level maintainability measures of our ColumbusQM probabilistic quality model using open-access bug datasets. We show that classes with lower maintainability score contain significantly more bugs than more maintainable classes. Moreover, in terms of correctness and completeness, ColumbusQM competes with statistical and machine learning prediction models especially trained on the bug data using product metrics as predictors. This is a great achievement in the light of that our model needs no training and its purpose is different (e.g. to estimate testability, or development costs) than those of the bug prediction models. In summary, we show that improving the maintainability of a software according to the suggestions of our quality model will expectedly also reduce its bug density.}, keywords = {Bug density, Bug prediction, Class level maintainability, ColumbusQM, ISO/IEC 25010, Software maintainability}, author = {Lad{\'a}nyi, Gergely and P Heged{\H u}s and Rudolf Ferenc and Istv{\'a}n Siket and Tibor Gyim{\'o}thy} }