summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorKarl 'vollmerk' Vollmer <vollmer@ampache.org>2006-02-13 08:12:47 +0000
committerKarl 'vollmerk' Vollmer <vollmer@ampache.org>2006-02-13 08:12:47 +0000
commit79b8e30e4e5b31ac2b21707b3e73d9ae485a7d3e (patch)
tree6c1a88e3e571ec767ab3cd147ee86fe576b07742 /lib
parent48e42c1b8cd28384dd7e52de8f0a4512e5dc3b4c (diff)
downloadampache-79b8e30e4e5b31ac2b21707b3e73d9ae485a7d3e.tar.gz
ampache-79b8e30e4e5b31ac2b21707b3e73d9ae485a7d3e.tar.bz2
ampache-79b8e30e4e5b31ac2b21707b3e73d9ae485a7d3e.zip
added loose name compare and improved rename
Diffstat (limited to 'lib')
-rw-r--r--lib/class/artist.class.php139
-rw-r--r--lib/duplicates.php2
-rw-r--r--lib/ui.lib.php2
3 files changed, 131 insertions, 12 deletions
diff --git a/lib/class/artist.class.php b/lib/class/artist.class.php
index 1b96b0aa..334472e3 100644
--- a/lib/class/artist.class.php
+++ b/lib/class/artist.class.php
@@ -205,7 +205,7 @@ class Artist {
@param $newname the artist's new name, either a new
artist will be created or songs added to existing
artist if name exists already
- @return the id of the new artist
+ @return the id of the new artist, or false if an error
*/
function rename($newname) {
@@ -221,11 +221,12 @@ class Artist {
/* check that it wasn't just whitespace that we were called to change */
if ($newid == $this->id) {
$GLOBALS['error']->add_error('artist_name',_("Error: Name Identical"));
- return $newid;
+ return false;
}
/* now we can just call merge */
- $this->merge($newid);
+ if (!$this->merge($newid))
+ return false;
//now return id
return $newid;
@@ -237,6 +238,7 @@ class Artist {
@discussion changes the artist id of all songs by this artist
to the given id and deletes self from db
@param $newid the new artist id that this artist's songs should have
+ @return the name of the new artist on success, false if error
*/
function merge($newid) {
@@ -249,15 +251,14 @@ class Artist {
}
// First check newid exists
- $check_exists_qstring = "SELECT name FROM artist WHERE id='" . sql_escape($newid) . "'";
+ $check_exists_qstring = "SELECT name FROM artist WHERE id='" . $newid . "'"; //no need to escape newid, it's numeric
$check_exists_query = mysql_query($check_exists_qstring, dbh());
- if ($check_exists_results = mysql_fetch_assoc($check_exists_query)) {
-
+ if ($check_exists_result = mysql_fetch_assoc($check_exists_query)) {
$NewName = $check_exists_result['name'];
// Now the query
- $sql = "UPDATE song SET artist='" . sql_escape($newid) . "' " .
+ $sql = "UPDATE song SET artist='" . $newid . "' " .
"WHERE artist='" . sql_escape($this->id) . "'";
$db_results = mysql_query($sql, dbh());
@@ -266,9 +267,11 @@ class Artist {
/* If we've done the merege we need to clean up */
$catalog->clean_artists();
$catalog->clean_albums();
+
+ return $NewName;
}
else {
- $GLOBALS['error']->add_error('general',"Error: Invalid Artist ID");
+ $GLOBALS['error']->add_error('general',"Error: No such artist to merge with");
return false;
}
} // merge
@@ -290,7 +293,125 @@ class Artist {
require (conf('prefix') . "/templates/show_artist.inc");
} // show_albums
-
+
+ /*!
+ @function get_similar_artists
+ @discussion returns an array of artist (id,name) arrays that are similar in name
+ All whitespace and special chars are ignored
+ @param extra arguments to normalize and compre, in that order
+ @return array of artist, each element is (id,name)
+ */
+ function get_similar_artists ($n_rep_uml,$n_filter,$n_ignore,$c_mode,$c_count_w,$c_percent_w,$c_distance_l) {
+ //strip out just about everything, including whitespace, numbers and weird chars, and then
+ //lowercase it
+ $name = $this->normalize_name($this->name,$n_rep_uml,$n_filter,$n_ignore);
+
+ //now for a bit of mysql query
+ $sql = "SELECT id, name FROM artist WHERE id != '" . sql_escape($this->id) . "'";
+ $query = mysql_query($sql, dbh());
+ //loop it
+ $similar_artists = array();
+ while ($r = mysql_fetch_assoc($query)) {
+ $artist_name = $this->normalize_name($r['name'],$n_rep_uml,$n_filter,$n_ignore);
+ //echo "'" . $r['name'] . "' => '" . $artist_name . "'<br/>\n";
+ if ($this->compare_loose($name,$artist_name,$c_mode,$c_count_w,$c_percent_w,$c_distance_l)) {
+ //echo "***MATCH***<br/>\n";
+ $similar_artists[] = array($r['id'],$r['name']);
+ }
+ }
+ return $similar_artists;
+ } // get_similar_artists
+
+
+ /*!
+ @function normalize_name
+ @param artist name to normalize
+ @param $replace_umlaut wether to replace umlauts and others with the plain letter, default true
+ @param $filter what to filter out, defulat /[^a-z ]/
+ @param $ignore terms to ignore, default /\s(the|an?)\s/ (name is padded with whitespace beforehand)
+ @returns the normalized version of the given artist name, containing only letters and single spaces
+ */
+ function normalize_name ($name,$replace_umlaut = NULL, $filter = NULL, $ignore = NULL) {
+ if (is_null($replace_umlaut)) $replace_umlaut = true;
+ if (is_null($filter)) $filter = "/[^a-z ]/";
+ if (is_null($ignore)) $ignore = "/\s(the|an?)\s/";
+ if ($replace_umlaut) {
+ //convert ümlauts, idea from http://php.net/manual/en/function.str-replace.php#50081
+ $umlauts = array("uml","acute","grave","cedil","ring","circ","tilde","lig","slash");
+ $name = str_replace($umlauts,"",htmlentities($name));
+ //now replace all &.; with .
+ $name = preg_replace("/&(.);/","\$1",$name);
+ //back to normal
+ $name = html_entity_decode($name);
+ }
+ //lowercase
+ $name = strtolower($name);
+ //now rip out all the special chars and spaces
+ $name = preg_replace($filter,"",$name);
+ //now certains terms can be dropped completely
+ //we have to add spaces on the sides though
+ $name = " " . $name . " ";
+ $name = preg_replace($ignore,"",$name);
+ //now single spaces
+ $name = preg_replace("/\s{2,}/"," ",$name);
+ //return
+ return trim($name);
+ } //normalize_name
+
+ /*!
+ @function compare_loose
+ @discussion percent and count are ORed together
+ @param $name1 artist name
+ @param $name2 artist name to compare against
+ @param $mode the type of matching to perform, one of line or word, default word
+ @param $countwords WORD MODE number of words that must be shared to match, 0 to disable, default 0
+ @param $percentwords WORD MODE percentage of words that must be shared to match, 0 to disable, default 50%
+ @param $distance LETTER MODE max levenshtein distance to pass as a match
+ @return true if given params are similar, false if not
+ */
+ function compare_loose ($name1,$name2,$mode = NULL,$countwords = NULL,$percentwords = NULL,$distance = NULL) {
+ if (is_null($mode)) $mode = "word";
+ if (is_null($countwords)) $countwords = 0;
+ if (is_null($percentwords)) $percentwords = 50;
+ if (is_null($distance)) $distance = 2;
+
+ //echo "Compare '$name1' vs. '$name2'<br/>\n";
+
+ $modes = array("line" => 0,"word" => 0,"letter" => 0);
+ $mode = (isset($modes[$mode]) ? $mode : "word");
+ switch ($mode) {
+ case "line":
+ //this is still relevant because of the normalize
+ return $name1 == $name2;
+ break;
+ case "word":
+ //echo " COMPARE: Word mode<br/>\n";
+ //first, count the number of terms in name1, and then the number that also appear in name2
+ $words = explode(" ",$name1);
+ $num_words = count($words);
+ $num_words_shared = 0;
+ foreach ($words as $word) {
+ //echo " Looking for word '$word'... ";
+ if (strpos($name2,$word) !== false) {
+ //echo "MATCHED";
+ $num_words_shared++;
+ } else {
+ //echo " Nope";
+ }
+ //echo "<br/>\n";
+ }
+ //now make the descision
+ return (
+ ($countwords > 0 && $num_words_shared >= $countwords) ||
+ ($percentwords > 0 && $num_words_shared > 0 && $num_words_shared/$num_words >= $percentwords/100)
+ );
+ break;
+ case "letter":
+ //simple
+ return levenshtein($name1,$name2) <= $distance;
+ break;
+ }
+ } //compare_loose
} //end of artist class
diff --git a/lib/duplicates.php b/lib/duplicates.php
index 75ca2a22..1d03a3da 100644
--- a/lib/duplicates.php
+++ b/lib/duplicates.php
@@ -42,8 +42,6 @@ function get_duplicate_songs($search_type) {
$sql = $sql." HAVING count(title) > 1";
$sql = $sql." ORDER BY ctitle";
- //echo $sql."<BR>";
-
$result = mysql_query($sql, dbh());
$arr = array();
diff --git a/lib/ui.lib.php b/lib/ui.lib.php
index 44a67710..128ba533 100644
--- a/lib/ui.lib.php
+++ b/lib/ui.lib.php
@@ -880,7 +880,7 @@ function show_artist_pulldown ($artist_id,$select_name='artist') {
$artist->get_count();
if ( $artist_id == $r['id'] ) {
- echo "\t<option value=\"" . $artist->id . "\" selected=\"selected\">". scrub_out($artist->name) . " (" . $artist->songs . ")</option>\n";
+ echo "\t<option value=\"" . $artist->id . "\" selected=\"selected\">". scrub_out($artist->name) . "</option>\n";
}
else {
echo "\t<option value=\"" . $artist->id . "\">". scrub_out($artist->name) ."</option>\n";