DouZero
/
douzero
/
evaluation
/
rlcard_agent.py

import random

from rlcard.games.doudizhu.utils import CARD_TYPE

EnvCard2RealCard = {3: '3', 4: '4', 5: '5', 6: '6', 7: '7',
                    8: '8', 9: '9', 10: 'T', 11: 'J', 12: 'Q',
                    13: 'K', 14: 'A', 17: '2', 20: 'B', 30: 'R'}
RealCard2EnvCard = {'3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
                    '8': 8, '9': 9, 'T': 10, 'J': 11, 'Q': 12,
                    'K': 13, 'A': 14, '2': 17, 'B': 20, 'R': 30}

INDEX = {'3': 0, '4': 1, '5': 2, '6': 3, '7': 4,
         '8': 5, '9': 6, 'T': 7, 'J': 8, 'Q': 9,
         'K': 10, 'A': 11, '2': 12, 'B': 13, 'R': 14}

class RLCardAgent(object):

    def __init__(self, position):
        self.name = 'RLCard'
        self.position = position

    def act(self, infoset):
        try:
            # Hand cards
            hand_cards = infoset.player_hand_cards
            for i, c in enumerate(hand_cards):
                hand_cards[i] = EnvCard2RealCard[c]
            hand_cards = ''.join(hand_cards)

            # Last move
            last_move = infoset.last_move.copy()
            for i, c in enumerate(last_move):
                last_move[i] = EnvCard2RealCard[c]
            last_move = ''.join(last_move)

            # Last two moves
            last_two_cards = infoset.last_two_moves
            for i in range(2):
                for j, c in enumerate(last_two_cards[i]):
                    last_two_cards[i][j] = EnvCard2RealCard[c]
                last_two_cards[i] = ''.join(last_two_cards[i])

            # Last pid
            last_pid = infoset.last_pid

            action = None
            # the rule of leading round
            if last_two_cards[0] == '' and last_two_cards[1] == '':
                chosen_action = None
                comb = combine_cards(hand_cards)
                min_card = hand_cards[0]
                for _, acs in comb.items():
                    for ac in acs:
                        if min_card in ac:
                            chosen_action = ac
                            action = [char for char in chosen_action]
                            for i, c in enumerate(action):
                                action[i] = RealCard2EnvCard[c]
                            #print('lead action:', action)
            # the rule of following cards
            else:
                the_type = CARD_TYPE[0][last_move][0][0]
                chosen_action = ''
                rank = 1000
                for ac in infoset.legal_actions:
                    _ac = ac.copy()
                    for i, c in enumerate(_ac):
                        _ac[i] = EnvCard2RealCard[c]
                    _ac = ''.join(_ac)
                    if _ac != '' and the_type == CARD_TYPE[0][_ac][0][0]:
                        if int(CARD_TYPE[0][_ac][0][1]) < rank:
                            rank = int(CARD_TYPE[0][_ac][0][1])
                            chosen_action = _ac
                if chosen_action != '':
                    action = [char for char in chosen_action]
                    for i, c in enumerate(action):
                        action[i] = RealCard2EnvCard[c]
                    #print('action:', action)
                elif last_pid != 'landlord' and self.position != 'landlord':
                    action = []

            if action is None:
                action = random.choice(infoset.legal_actions)
        except:
            action = random.choice(infoset.legal_actions)
            #import traceback
            #traceback.print_exc()

        assert action in infoset.legal_actions

        return action

def card_str2list(hand):
    hand_list = [0 for _ in range(15)]
    for card in hand:
        hand_list[INDEX[card]] += 1
    return hand_list

def list2card_str(hand_list):
    card_str = ''
    cards = [card for card in INDEX]
    for index, count in enumerate(hand_list):
        card_str += cards[index] * count
    return card_str

def pick_chain(hand_list, count):
    chains = []
    str_card = [card for card in INDEX]
    hand_list = [str(card) for card in hand_list]
    hand = ''.join(hand_list[:12])
    chain_list = hand.split('0')
    add = 0
    for index, chain in enumerate(chain_list):
        if len(chain) > 0:
            if len(chain) >= 5:
                start = index + add
                min_count = int(min(chain)) // count
                if min_count != 0:
                    str_chain = ''
                    for num in range(len(chain)):
                        str_chain += str_card[start+num]
                        hand_list[start+num] = int(hand_list[start+num]) - int(min(chain))
                    for _ in range(min_count):
                        chains.append(str_chain)
            add += len(chain)
    hand_list = [int(card) for card in hand_list]
    return (chains, hand_list)

def combine_cards(hand):
    '''Get optimal combinations of cards in hand
    '''
    comb = {'rocket': [], 'bomb': [], 'trio': [], 'trio_chain': [],
            'solo_chain': [], 'pair_chain': [], 'pair': [], 'solo': []}
    # 1. pick rocket
    if hand[-2:] == 'BR':
        comb['rocket'].append('BR')
        hand = hand[:-2]
    # 2. pick bomb
    hand_cp = hand
    for index in range(len(hand_cp) - 3):
        if hand_cp[index] == hand_cp[index+3]:
            bomb = hand_cp[index: index+4]
            comb['bomb'].append(bomb)
            hand = hand.replace(bomb, '')
    # 3. pick trio and trio_chain
    hand_cp = hand
    for index in range(len(hand_cp) - 2):
        if hand_cp[index] == hand_cp[index+2]:
            trio = hand_cp[index: index+3]
            if len(comb['trio']) > 0 and INDEX[trio[-1]] < 12 and (INDEX[trio[-1]]-1) == INDEX[comb['trio'][-1][-1]]:
                comb['trio'][-1] += trio
            else:
                comb['trio'].append(trio)
            hand = hand.replace(trio, '')
    only_trio = []
    only_trio_chain = []
    for trio in comb['trio']:
        if len(trio) == 3:
            only_trio.append(trio)
        else:
            only_trio_chain.append(trio)
    comb['trio'] = only_trio
    comb['trio_chain'] = only_trio_chain
    # 4. pick solo chain
    hand_list = card_str2list(hand)
    chains, hand_list = pick_chain(hand_list, 1)
    comb['solo_chain'] = chains
    # 5. pick par_chain
    chains, hand_list = pick_chain(hand_list, 2)
    comb['pair_chain'] = chains
    hand = list2card_str(hand_list)
    # 6. pick pair and solo
    index = 0
    while index < len(hand) - 1:
        if hand[index] == hand[index+1]:
            comb['pair'].append(hand[index] + hand[index+1])
            index += 2
        else:
            comb['solo'].append(hand[index])
            index += 1
    if index == (len(hand) - 1):
        comb['solo'].append(hand[index])
    return comb