Ćwiczenie 4: Odkrywanie reguł asocjacyjnych

W tym ćwiczeniu zapoznajemy się z metodami odkrywania reguł asocjacyjnych. Analizujemy własności algorytów Apriori i FP-Growth i sprawdzamy, jak parametry minimalnego wsparcia (minsup) i minimalnej ufności (minconf) wpływają na uzyskiwane wyniki.

literatura
Rapid Miner

Oracle Data Mining
SET PAGESIZE 1000
 
CREATE OR REPLACE VIEW MARKET_BASKET_DATA AS 
SELECT cust_id, prod_name, 1 has_it 
FROM (SELECT a.cust_id, b.prod_name 
      FROM SH.SALES a, SH.PRODUCTS b 
      WHERE a.prod_id = b.prod_id)
WHERE cust_id BETWEEN 100000 AND 100500
GROUP BY cust_id, prod_name ;
 
CREATE OR REPLACE VIEW MARKET_BASKET_DATA_AR AS 
SELECT D.CUST_ID, CAST(MULTISET( SELECT DM_Nested_Numerical(SUBSTR(C.PROD_NAME, 1, 30), has_it) 
                                 FROM MARKET_BASKET_DATA C 
                                 WHERE C.CUST_ID = D.CUST_ID ) AS DM_Nested_Numericals) CUSTPRODUS 
FROM MARKET_BASKET_DATA D
GROUP BY D.CUST_ID;
SELECT * FROM MARKET_BASKET_DATA WHERE cust_id = 100001;
 
SELECT * FROM MARKET_BASKET_DATA_AR WHERE cust_id = 100001;
BEGIN
  DBMS_DATA_MINING.DROP_MODEL('Associations');
END;
DROP TABLE settings;
 
CREATE TABLE settings (
  setting_name VARCHAR2(30),
  setting_value VARCHAR2(128) 
);
BEGIN
  INSERT INTO settings VALUES (dbms_data_mining.asso_max_rule_length,3);
  INSERT INTO settings VALUES (dbms_data_mining.asso_min_support,0.1);
  INSERT INTO settings VALUES (dbms_data_mining.asso_min_confidence,0.5);
  COMMIT; 
END;
BEGIN
  DBMS_DATA_MINING.CREATE_MODEL(
    model_name          => 'Associations',
    mining_function     => DBMS_DATA_MINING.ASSOCIATION,
    data_table_name     => 'market_basket_data_ar',
    case_id_column_name => 'cust_id',
    settings_table_name => 'settings' );
END;
SELECT * FROM TABLE (
  DBMS_DATA_MINING.GET_MODEL_SETTINGS('Associations'))
ORDER BY setting_name;
SELECT t.itemset_id, i.attribute_subname AS item, t.support, t.number_of_items
FROM TABLE (DBMS_DATA_MINING.GET_FREQUENT_ITEMSETS('Associations')) t, TABLE (t.items) i
GROUP BY t.itemset_id, i.attribute_subname, t.support, t.number_of_items
ORDER BY t.itemset_id, i.attribute_subname, t.support, t.number_of_items;
 
SELECT t.rule_id, t.rule_support, t.rule_confidence, a.attribute_subname AS antecedent, c.attribute_subname AS consequent
FROM TABLE (DBMS_DATA_MINING.GET_ASSOCIATION_RULES('Associations')) t, 
     TABLE (t.antecedent) a, TABLE (t.consequent) c
ORDER BY t.rule_id, t.rule_support DESC, t.rule_confidence;